diff --git a/.cache b/.cache new file mode 120000 index 0000000000000000000000000000000000000000..310cb5b884182f2d345fc69e9a2d0ac9c97808d6 --- /dev/null +++ b/.cache @@ -0,0 +1 @@ +/home/javierr/.cache \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index 755356a054893aa3dbdb5ae3fec6a00637e6e3ae..f0eef4b95ff394f9e69664d929bcd93ac1e80134 100644 --- a/.gitattributes +++ b/.gitattributes @@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zstandard filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +*.wandb filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..c7206fcce69b9f4554e467c8f4f832150fce187d --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,4 @@ +{ + "": 40, + "": 39 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..260219f2064b3570984d97f765777fa82ba04ed6 --- /dev/null +++ b/config.json @@ -0,0 +1,109 @@ +{ + "activation_dropout": 0.055, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.094, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": true, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.04, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": true, + "hidden_act": "gelu", + "hidden_dropout": 0.047, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.041, + "mask_feature_length": 64, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.25, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.082, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 38, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 39, + "xvector_output_dim": 512 +} diff --git a/events.out.tfevents.1659137001.t1v-n-eedfb410-w-0.2559923.0.v2 b/events.out.tfevents.1659137001.t1v-n-eedfb410-w-0.2559923.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..93fa8b058d4ac58879a0af8ae51f420c74ebcbac --- /dev/null +++ b/events.out.tfevents.1659137001.t1v-n-eedfb410-w-0.2559923.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ded7b585446de921ebfe6f76acb1100484ca4c264a7b066ab3fa8904e6e4b6e6 +size 40 diff --git a/events.out.tfevents.1659168077.t1v-n-eedfb410-w-0.2047809.0.v2 b/events.out.tfevents.1659168077.t1v-n-eedfb410-w-0.2047809.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..7627616f294c3e604616556f2cb1b1cebc135977 --- /dev/null +++ b/events.out.tfevents.1659168077.t1v-n-eedfb410-w-0.2047809.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e93ee6816853f3923399b5d527d2a61ca34ce887c28517df99c05fb20a7e6c +size 40 diff --git a/events.out.tfevents.1659169464.t1v-n-eedfb410-w-0.1065426.0.v2 b/events.out.tfevents.1659169464.t1v-n-eedfb410-w-0.1065426.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..681c54c207e4fafd43f9203c926c7aa535aed99f --- /dev/null +++ b/events.out.tfevents.1659169464.t1v-n-eedfb410-w-0.1065426.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e59e7c0f2f3d5c03effbc941690f4567f87bf74e6d559526bbd802d65dc2710a +size 40 diff --git a/events.out.tfevents.1659171045.t1v-n-eedfb410-w-0.86199.0.v2 b/events.out.tfevents.1659171045.t1v-n-eedfb410-w-0.86199.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..178ec4b55c6a3c400d4cb788fe67460ad2bbf6f6 --- /dev/null +++ b/events.out.tfevents.1659171045.t1v-n-eedfb410-w-0.86199.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f89384c846e08fa515da01d4c2459ae230a9c6702cc6ab9dc8533217d5d44e0 +size 40 diff --git a/events.out.tfevents.1659174715.t1v-n-eedfb410-w-0.3333166.0.v2 b/events.out.tfevents.1659174715.t1v-n-eedfb410-w-0.3333166.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..a7583e0765c1cd375165732a4331ce7139edbdd9 --- /dev/null +++ b/events.out.tfevents.1659174715.t1v-n-eedfb410-w-0.3333166.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac1687a5731ee3d8fe81c3a3a9cad3798a3cfd2de48cf0e700947ec667e5183 +size 40 diff --git a/events.out.tfevents.1659181854.t1v-n-eedfb410-w-0.3085831.0.v2 b/events.out.tfevents.1659181854.t1v-n-eedfb410-w-0.3085831.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..1abd574967f1ff0677cdb12866af4911e4f357eb --- /dev/null +++ b/events.out.tfevents.1659181854.t1v-n-eedfb410-w-0.3085831.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c744a976d0f5879c6383cdbfc1c543b24f09469a2279b2297c378547f26f08c5 +size 40 diff --git a/events.out.tfevents.1659182962.t1v-n-eedfb410-w-0.2099342.0.v2 b/events.out.tfevents.1659182962.t1v-n-eedfb410-w-0.2099342.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..3eccc090277cf26bca9f343fd708ab2f97485fdb --- /dev/null +++ b/events.out.tfevents.1659182962.t1v-n-eedfb410-w-0.2099342.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c58c7b4399f1690c0cf07b003a6f00eda584715b170f4b5ba7ae5694af533262 +size 40 diff --git a/events.out.tfevents.1659184651.t1v-n-eedfb410-w-0.4852.0.v2 b/events.out.tfevents.1659184651.t1v-n-eedfb410-w-0.4852.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..a59deea18087557d3c0ae5631be4ab9dea9c26d2 --- /dev/null +++ b/events.out.tfevents.1659184651.t1v-n-eedfb410-w-0.4852.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:711a53c830c473bac46091e9ffea0b461decbed903088bb927b1c4a9331a21f9 +size 40 diff --git a/events.out.tfevents.1659185790.t1v-n-eedfb410-w-0.3212038.0.v2 b/events.out.tfevents.1659185790.t1v-n-eedfb410-w-0.3212038.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..88ba3506d3063b9b9dc26c416e37b2849d03af6c --- /dev/null +++ b/events.out.tfevents.1659185790.t1v-n-eedfb410-w-0.3212038.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c9db04b6cba094a693bcea7d3e63f4af0128112bd11f0ca41d1dd0998cfc8a5 +size 40 diff --git a/events.out.tfevents.1659190065.t1v-n-eedfb410-w-0.2276371.0.v2 b/events.out.tfevents.1659190065.t1v-n-eedfb410-w-0.2276371.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..a5850b5d0e436aab17d3beac038e6add4550c2c7 --- /dev/null +++ b/events.out.tfevents.1659190065.t1v-n-eedfb410-w-0.2276371.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2abfac3540820545dabe683f72071edc7efb99f2d83d2bfd2a0ccd92f388ba2 +size 40 diff --git a/events.out.tfevents.1659203868.t1v-n-eedfb410-w-0.1493173.0.v2 b/events.out.tfevents.1659203868.t1v-n-eedfb410-w-0.1493173.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..71a830ff2ca7502829d13ab32441ccedc40799a4 --- /dev/null +++ b/events.out.tfevents.1659203868.t1v-n-eedfb410-w-0.1493173.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a908c7234714cc88dd9694ee3edfa2051d03c68e9bf8d6ca30df43ae3bdb58ef +size 40 diff --git a/flax_model.msgpack b/flax_model.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..c5466547ea09e27af43d81804418ed40885a941a --- /dev/null +++ b/flax_model.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c0a8533250d85a6e1bccebd2a781b061e35d55e0a42a09491a2f29826c05a05 +size 3850218852 diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..254251fb5739afb6a196632e07dcee2334d03ed2 --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,6 @@ +from models.configuration_bart import BartConfig +from models.configuration_wav2vec2 import Wav2Vec2Config +from models.configuration_speech_encoder_decoder import SpeechEncoderDecoderConfig +from models.modeling_flax_wav2vec2 import FlaxWav2Vec2Model, FlaxWav2Vec2Module, FlaxWav2Vec2ForCTC, FlaxWav2Vec2ForCTCModule +from models.modeling_flax_bart import FlaxBartForCausalLM, FlaxBartForCausalLMModule +from models.modeling_flax_speech_encoder_decoder import FlaxSpeechEncoderDecoderModel diff --git a/models/__pycache__/__init__.cpython-38.pyc b/models/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7207e33c1cc3f1de8c15cd91c251e5c42778332c Binary files /dev/null and b/models/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/__pycache__/configuration_bart.cpython-38.pyc b/models/__pycache__/configuration_bart.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..142557bbe55d3b6ee9e3cb7c74f0c78f8c93ff49 Binary files /dev/null and b/models/__pycache__/configuration_bart.cpython-38.pyc differ diff --git a/models/__pycache__/configuration_speech_encoder_decoder.cpython-38.pyc b/models/__pycache__/configuration_speech_encoder_decoder.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9081eba88eba38668dd664e832f0585be1ff75e3 Binary files /dev/null and b/models/__pycache__/configuration_speech_encoder_decoder.cpython-38.pyc differ diff --git a/models/__pycache__/configuration_wav2vec2.cpython-38.pyc b/models/__pycache__/configuration_wav2vec2.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd52c2fbb1b2ab1ec86fda889821505491291fb1 Binary files /dev/null and b/models/__pycache__/configuration_wav2vec2.cpython-38.pyc differ diff --git a/models/__pycache__/modeling_flax_bart.cpython-38.pyc b/models/__pycache__/modeling_flax_bart.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..205602c2ff9def7cfbf501c35acc0f975f45b35e Binary files /dev/null and b/models/__pycache__/modeling_flax_bart.cpython-38.pyc differ diff --git a/models/__pycache__/modeling_flax_speech_encoder_decoder.cpython-38.pyc b/models/__pycache__/modeling_flax_speech_encoder_decoder.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..821aaee1f4bd6812725827fa307af3ea0789c0d9 Binary files /dev/null and b/models/__pycache__/modeling_flax_speech_encoder_decoder.cpython-38.pyc differ diff --git a/models/__pycache__/modeling_flax_wav2vec2.cpython-38.pyc b/models/__pycache__/modeling_flax_wav2vec2.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c143adbba5f614708fe32dcd8862aeeebb48999 Binary files /dev/null and b/models/__pycache__/modeling_flax_wav2vec2.cpython-38.pyc differ diff --git a/models/configuration_bart.py b/models/configuration_bart.py new file mode 100644 index 0000000000000000000000000000000000000000..45ab9edf6126bcfa2f73bb79e786f5ec2bff5d78 --- /dev/null +++ b/models/configuration_bart.py @@ -0,0 +1,183 @@ +# coding=utf-8 +# Copyright 2021 The Fairseq Authors and The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" BART model configuration""" +import warnings + +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + + +logger = logging.get_logger(__name__) + +BART_PRETRAINED_CONFIG_ARCHIVE_MAP = { + "facebook/bart-large": "https://huggingface.co/facebook/bart-large/resolve/main/config.json", + # See all BART models at https://huggingface.co/models?filter=bart +} + + +class BartConfig(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`BartModel`]. It is used to instantiate a BART + model according to the specified arguments, defining the model architecture. Instantiating a configuration with the + defaults will yield a similar configuration to that of the BART + [facebook/bart-large](https://huggingface.co/facebook/bart-large) architecture. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + + Args: + vocab_size (`int`, *optional*, defaults to 50265): + Vocabulary size of the BART model. Defines the number of different tokens that can be represented by the + `inputs_ids` passed when calling [`BartModel`] or [`TFBartModel`]. + d_model (`int`, *optional*, defaults to 1024): + Dimensionality of the layers and the pooler layer. + encoder_layers (`int`, *optional*, defaults to 12): + Number of encoder layers. + decoder_layers (`int`, *optional*, defaults to 12): + Number of decoder layers. + encoder_attention_heads (`int`, *optional*, defaults to 16): + Number of attention heads for each attention layer in the Transformer encoder. + decoder_attention_heads (`int`, *optional*, defaults to 16): + Number of attention heads for each attention layer in the Transformer decoder. + decoder_ffn_dim (`int`, *optional*, defaults to 4096): + Dimensionality of the "intermediate" (often named feed-forward) layer in decoder. + encoder_ffn_dim (`int`, *optional*, defaults to 4096): + Dimensionality of the "intermediate" (often named feed-forward) layer in decoder. + activation_function (`str` or `function`, *optional*, defaults to `"gelu"`): + The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, + `"relu"`, `"silu"` and `"gelu_new"` are supported. + dropout (`float`, *optional*, defaults to 0.1): + The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. + attention_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for the attention probabilities. + activation_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for activations inside the fully connected layer. + classifier_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for classifier. + max_position_embeddings (`int`, *optional*, defaults to 1024): + The maximum sequence length that this model might ever be used with. Typically set this to something large + just in case (e.g., 512 or 1024 or 2048). + init_std (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + encoder_layerdrop: (`float`, *optional*, defaults to 0.0): + The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556) + for more details. + decoder_layerdrop: (`float`, *optional*, defaults to 0.0): + The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556) + for more details. + scale_embedding (`bool`, *optional*, defaults to `False`): + Scale embeddings by diving by sqrt(d_model). + use_cache (`bool`, *optional*, defaults to `True`): + Whether or not the model should return the last key/values attentions (not used by all models). + num_labels: (`int`, *optional*, defaults to 3): + The number of labels to use in [`BartForSequenceClassification`]. + forced_eos_token_id (`int`, *optional*, defaults to 2): + The id of the token to force as the last generated token when `max_length` is reached. Usually set to + `eos_token_id`. + use_scan (`bool`, *optional*, defaults to `False`): + Whether or not to use nn.scan in the Flax Bart attention layers. + + Example: + + ```python + >>> from transformers import BartModel, BartConfig + + >>> # Initializing a BART facebook/bart-large style configuration + >>> configuration = BartConfig() + + >>> # Initializing a model from the facebook/bart-large style configuration + >>> model = BartModel(configuration) + + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" + model_type = "bart" + keys_to_ignore_at_inference = ["past_key_values"] + attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"} + + def __init__( + self, + vocab_size=50265, + max_position_embeddings=1024, + encoder_layers=12, + encoder_ffn_dim=4096, + encoder_attention_heads=16, + decoder_layers=12, + decoder_ffn_dim=4096, + decoder_attention_heads=16, + encoder_layerdrop=0.0, + decoder_layerdrop=0.0, + activation_function="gelu", + d_model=1024, + dropout=0.1, + attention_dropout=0.0, + activation_dropout=0.0, + init_std=0.02, + classifier_dropout=0.0, + scale_embedding=False, + use_cache=True, + use_scan=False, + fuse_matmuls=False, + num_labels=3, + pad_token_id=1, + bos_token_id=0, + eos_token_id=2, + is_encoder_decoder=True, + decoder_start_token_id=2, + forced_eos_token_id=2, + **kwargs + ): + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.d_model = d_model + self.encoder_ffn_dim = encoder_ffn_dim + self.encoder_layers = encoder_layers + self.encoder_attention_heads = encoder_attention_heads + self.decoder_ffn_dim = decoder_ffn_dim + self.decoder_layers = decoder_layers + self.decoder_attention_heads = decoder_attention_heads + self.dropout = dropout + self.attention_dropout = attention_dropout + self.activation_dropout = activation_dropout + self.activation_function = activation_function + self.init_std = init_std + self.encoder_layerdrop = encoder_layerdrop + self.decoder_layerdrop = decoder_layerdrop + self.classifier_dropout = classifier_dropout + self.use_cache = use_cache + self.use_scan = use_scan + self.fuse_matmuls = fuse_matmuls + self.num_hidden_layers = encoder_layers + self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True + + super().__init__( + num_labels=num_labels, + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + is_encoder_decoder=is_encoder_decoder, + decoder_start_token_id=decoder_start_token_id, + forced_eos_token_id=forced_eos_token_id, + **kwargs, + ) + + # ensure backward compatibility for BART CNN models + if self.forced_bos_token_id is None and kwargs.get("force_bos_token_to_be_generated", False): + self.forced_bos_token_id = self.bos_token_id + warnings.warn( + f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions. " + "The config can simply be saved and uploaded again to be fixed." + ) diff --git a/models/configuration_speech_encoder_decoder.py b/models/configuration_speech_encoder_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..a60be81a7088085c1d56b1d52c68f1bada283064 --- /dev/null +++ b/models/configuration_speech_encoder_decoder.py @@ -0,0 +1,121 @@ +# coding=utf-8 +# Copyright 2021 The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging +from models.configuration_wav2vec2 import Wav2Vec2Config +from models.configuration_bart import BartConfig +from transformers import AutoConfig + + +logger = logging.get_logger(__name__) + + +class SpeechEncoderDecoderConfig(PretrainedConfig): + r""" + [`SpeechEncoderDecoderConfig`] is the configuration class to store the configuration of a + [`SpeechEncoderDecoderModel`]. It is used to instantiate an Encoder Decoder model according to the specified + arguments, defining the encoder and decoder configs. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + Args: + kwargs (*optional*): + Dictionary of keyword arguments. Notably: + + - **encoder** ([`PretrainedConfig`], *optional*) -- An instance of a configuration object that defines + the encoder config. + - **decoder** ([`PretrainedConfig`], *optional*) -- An instance of a configuration object that defines + the decoder config. + + Examples: + + ```python + >>> from transformers import BertConfig, Wav2Vec2Config, SpeechEncoderDecoderConfig, SpeechEncoderDecoderModel + + >>> # Initializing a Wav2Vec2 & BERT style configuration + >>> config_encoder = Wav2Vec2Config() + >>> config_decoder = BertConfig() + + >>> config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder) + + >>> # Initializing a Wav2Vec2Bert model from a Wav2Vec2 & bert-base-uncased style configurations + >>> model = SpeechEncoderDecoderModel(config=config) + + >>> # Accessing the model configuration + >>> config_encoder = model.config.encoder + >>> config_decoder = model.config.decoder + >>> # set decoder config to causal lm + >>> config_decoder.is_decoder = True + >>> config_decoder.add_cross_attention = True + + >>> # Saving the model, including its configuration + >>> model.save_pretrained("my-model") + + >>> # loading model and config from pretrained folder + >>> encoder_decoder_config = SpeechEncoderDecoderConfig.from_pretrained("my-model") + >>> model = SpeechEncoderDecoderModel.from_pretrained("my-model", config=encoder_decoder_config) + ```""" + model_type = "speech-encoder-decoder" + is_composition = True + + def __init__(self, **kwargs): + super().__init__(**kwargs) + if "encoder" not in kwargs or "decoder" not in kwargs: + raise ValueError( + f"A configuraton of type {self.model_type} cannot be instantiated because not both `encoder` and `decoder` sub-configurations are passed, but only {kwargs}" + ) + + encoder_config = kwargs.pop("encoder") + decoder_config = kwargs.pop("decoder") + + # TODO: Load configs from AutoConfig (as done in Transformers 🤗) + self.encoder = Wav2Vec2Config(**encoder_config) + self.decoder = BartConfig(**decoder_config) + self.is_encoder_decoder = True + + @classmethod + def from_encoder_decoder_configs( + cls, encoder_config: PretrainedConfig, decoder_config: PretrainedConfig, **kwargs + ) -> PretrainedConfig: + r""" + Instantiate a [`SpeechEncoderDecoderConfig`] (or a derived class) from a pre-trained encoder model + configuration and decoder model configuration. + + Returns: + [`SpeechEncoderDecoderConfig`]: An instance of a configuration object + """ + logger.info("Setting `config.is_decoder=True` and `config.add_cross_attention=True` for decoder_config") + decoder_config.is_decoder = True + decoder_config.add_cross_attention = True + + return cls(encoder=encoder_config.to_dict(), decoder=decoder_config.to_dict(), **kwargs) + + def to_dict(self): + """ + Serializes this instance to a Python dictionary. Override the default *to_dict()* from *PretrainedConfig*. + + Returns: + `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, + """ + output = copy.deepcopy(self.__dict__) + output["encoder"] = self.encoder.to_dict() + output["decoder"] = self.decoder.to_dict() + output["model_type"] = self.__class__.model_type + return output diff --git a/models/configuration_wav2vec2.py b/models/configuration_wav2vec2.py new file mode 100644 index 0000000000000000000000000000000000000000..c93bc7e6df8d69fe5d8f0dce61ab5e590f7e361f --- /dev/null +++ b/models/configuration_wav2vec2.py @@ -0,0 +1,344 @@ +# coding=utf-8 +# Copyright 2021 The Fairseq Authors and The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Wav2Vec2 model configuration""" + +import functools +import operator + +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + + +logger = logging.get_logger(__name__) + +WAV_2_VEC_2_PRETRAINED_CONFIG_ARCHIVE_MAP = { + "facebook/wav2vec2-base-960h": "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/config.json", + # See all Wav2Vec2 models at https://huggingface.co/models?filter=wav2vec2 +} + + +class Wav2Vec2Config(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`Wav2Vec2Model`]. It is used to instantiate an + Wav2Vec2 model according to the specified arguments, defining the model architecture. Instantiating a configuration + with the defaults will yield a similar configuration to that of the Wav2Vec2 + [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) architecture. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + + Args: + vocab_size (`int`, *optional*, defaults to 32): + Vocabulary size of the Wav2Vec2 model. Defines the number of different tokens that can be represented by + the `inputs_ids` passed when calling [`Wav2Vec2Model`] or [`TFWav2Vec2Model`]. Vocabulary size of the + model. Defines the different tokens that can be represented by the *inputs_ids* passed to the forward + method of [`Wav2Vec2Model`]. + hidden_size (`int`, *optional*, defaults to 768): + Dimensionality of the encoder layers and the pooler layer. + num_hidden_layers (`int`, *optional*, defaults to 12): + Number of hidden layers in the Transformer encoder. + num_attention_heads (`int`, *optional*, defaults to 12): + Number of attention heads for each attention layer in the Transformer encoder. + intermediate_size (`int`, *optional*, defaults to 3072): + Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. + hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`): + The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, + `"relu"`, `"selu"` and `"gelu_new"` are supported. + hidden_dropout (`float`, *optional*, defaults to 0.1): + The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. + attention_dropout (`float`, *optional*, defaults to 0.1): + The dropout ratio for the attention probabilities. + final_dropout (`float`, *optional*, defaults to 0.1): + The dropout probability for the final projection layer of [`Wav2Vec2ForCTC`]. + initializer_range (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + layer_norm_eps (`float`, *optional*, defaults to 1e-12): + The epsilon used by the layer normalization layers. + feat_extract_norm (`str`, *optional*, defaults to `"group"`): + The norm to be applied to 1D convolutional layers in feature encoder. One of `"group"` for group + normalization of only the first 1D convolutional layer or `"layer"` for layer normalization of all 1D + convolutional layers. + feat_proj_dropout (`float`, *optional*, defaults to 0.0): + The dropout probability for output of the feature encoder. + feat_extract_activation (`str, `optional`, defaults to `"gelu"`): + The non-linear activation function (function or string) in the 1D convolutional layers of the feature + extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported. + feat_quantizer_dropout (`float`, *optional*, defaults to 0.0): + The dropout probabilitiy for quantized feature encoder states. + conv_dim (`Tuple[int]`, *optional*, defaults to `(512, 512, 512, 512, 512, 512, 512)`): + A tuple of integers defining the number of input and output channels of each 1D convolutional layer in the + feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers. + conv_stride (`Tuple[int]`, *optional*, defaults to `(5, 2, 2, 2, 2, 2, 2)`): + A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length + of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*. + conv_kernel (`Tuple[int]`, *optional*, defaults to `(10, 3, 3, 3, 3, 3, 3)`): + A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The + length of *conv_kernel* defines the number of convolutional layers and has to match the length of + *conv_dim*. + conv_bias (`bool`, *optional*, defaults to `False`): + Whether the 1D convolutional layers have a bias. + num_conv_pos_embeddings (`int`, *optional*, defaults to 128): + Number of convolutional positional embeddings. Defines the kernel size of 1D convolutional positional + embeddings layer. + num_conv_pos_embedding_groups (`int`, *optional*, defaults to 16): + Number of groups of 1D convolutional positional embeddings layer. + do_stable_layer_norm (`bool`, *optional*, defaults to `False`): + Whether to apply *stable* layer norm architecture of the Transformer encoder. `do_stable_layer_norm is + True` corresponds to applying layer norm before the attention layer, whereas `do_stable_layer_norm is + False` corresponds to applying layer norm after the attention layer. + apply_spec_augment (`bool`, *optional*, defaults to `True`): + Whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder. For reference see + [SpecAugment: A Simple Data Augmentation Method for Automatic Speech + Recognition](https://arxiv.org/abs/1904.08779). + mask_time_prob (`float`, *optional*, defaults to 0.05): + Percentage (between 0 and 1) of all feature vectors along the time axis which will be masked. The masking + procecure generates ''mask_time_prob*len(time_axis)/mask_time_length'' independent masks over the axis. If + reasoning from the propability of each feature vector to be chosen as the start of the vector span to be + masked, *mask_time_prob* should be `prob_vector_start*mask_time_length`. Note that overlap may decrease the + actual percentage of masked vectors. This is only relevant if `apply_spec_augment is True`. + mask_time_length (`int`, *optional*, defaults to 10): + Length of vector span along the time axis. + mask_time_min_masks (`int`, *optional*, defaults to 2),: + The minimum number of masks of length `mask_feature_length` generated along the time axis, each time step, + irrespectively of `mask_feature_prob`. Only relevant if ''mask_time_prob*len(time_axis)/mask_time_length < + mask_time_min_masks'' + mask_feature_prob (`float`, *optional*, defaults to 0.0): + Percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked. The + masking procecure generates ''mask_feature_prob*len(feature_axis)/mask_time_length'' independent masks over + the axis. If reasoning from the propability of each feature vector to be chosen as the start of the vector + span to be masked, *mask_feature_prob* should be `prob_vector_start*mask_feature_length`. Note that overlap + may decrease the actual percentage of masked vectors. This is only relevant if `apply_spec_augment is + True`. + mask_feature_length (`int`, *optional*, defaults to 10): + Length of vector span along the feature axis. + mask_feature_min_masks (`int`, *optional*, defaults to 0),: + The minimum number of masks of length `mask_feature_length` generated along the feature axis, each time + step, irrespectively of `mask_feature_prob`. Only relevant if + ''mask_feature_prob*len(feature_axis)/mask_feature_length < mask_feature_min_masks'' + num_codevectors_per_group (`int`, *optional*, defaults to 320): + Number of entries in each quantization codebook (group). + num_codevector_groups (`int`, *optional*, defaults to 2): + Number of codevector groups for product codevector quantization. + contrastive_logits_temperature (`float`, *optional*, defaults to 0.1): + The temperature *kappa* in the contrastive loss. + feat_quantizer_dropout (`float`, *optional*, defaults to 0.0): + The dropout probabilitiy for the output of the feature encoder that's used by the quantizer. + num_negatives (`int`, *optional*, defaults to 100): + Number of negative samples for the contrastive loss. + codevector_dim (`int`, *optional*, defaults to 256): + Dimensionality of the quantized feature vectors. + proj_codevector_dim (`int`, *optional*, defaults to 256): + Dimensionality of the final projection of both the quantized and the transformer features. + diversity_loss_weight (`int`, *optional*, defaults to 0.1): + The weight of the codebook diversity loss component. + ctc_loss_reduction (`str`, *optional*, defaults to `"sum"`): + Specifies the reduction to apply to the output of `torch.nn.CTCLoss`. Only relevant when training an + instance of [`Wav2Vec2ForCTC`]. + ctc_zero_infinity (`bool`, *optional*, defaults to `False`): + Whether to zero infinite losses and the associated gradients of `torch.nn.CTCLoss`. Infinite losses mainly + occur when the inputs are too short to be aligned to the targets. Only relevant when training an instance + of [`Wav2Vec2ForCTC`]. + use_weighted_layer_sum (`bool`, *optional*, defaults to `False`): + Whether to use a weighted average of layer outputs with learned weights. Only relevant when using an + instance of [`Wav2Vec2ForSequenceClassification`]. + classifier_proj_size (`int`, *optional*, defaults to 256): + Dimensionality of the projection before token mean-pooling for classification. + tdnn_dim (`Tuple[int]`, *optional*, defaults to `(512, 512, 512, 512, 1500)`): + A tuple of integers defining the number of output channels of each 1D convolutional layer in the *TDNN* + module of the *XVector* model. The length of *tdnn_dim* defines the number of *TDNN* layers. + tdnn_kernel (`Tuple[int]`, *optional*, defaults to `(5, 3, 3, 1, 1)`): + A tuple of integers defining the kernel size of each 1D convolutional layer in the *TDNN* module of the + *XVector* model. The length of *tdnn_kernel* has to match the length of *tdnn_dim*. + tdnn_dilation (`Tuple[int]`, *optional*, defaults to `(1, 2, 3, 1, 1)`): + A tuple of integers defining the dilation factor of each 1D convolutional layer in *TDNN* module of the + *XVector* model. The length of *tdnn_dilation* has to match the length of *tdnn_dim*. + xvector_output_dim (`int`, *optional*, defaults to 512): + Dimensionality of the *XVector* embedding vectors. + add_adapter (`bool`, *optional*, defaults to `False`): + Whether a convolutional network should be stacked on top of the Wav2Vec2 Encoder. Can be very useful for + warm-starting Wav2Vec2 for SpeechEncoderDecoder models. + adapter_kernel_size (`int`, *optional*, defaults to 3): + Kernel size of the convolutional layers in the adapter network. Only relevant if `add_adapter is True`. + adapter_stride (`int`, *optional*, defaults to 2): + Stride of the convolutional layers in the adapter network. Only relevant if `add_adapter is True`. + num_adapter_layers (`int`, *optional*, defaults to 3): + Number of convolutional layers that should be used in the adapter network. Only relevant if `add_adapter is + True`. + output_hidden_size (`int`, *optional*): + Dimensionality of the encoder output layer. If not defined, this defaults to *hidden-size*. Only relevant + if `add_adapter is True`. + use_scan (`bool`, *optional*, defaults to `False`): + Whether or not to use nn.scan in the Flax Wav2Vec2 transformer layers. + + Example: + + ```python + >>> from transformers import Wav2Vec2Model, Wav2Vec2Config + + >>> # Initializing a Wav2Vec2 facebook/wav2vec2-base-960h style configuration + >>> configuration = Wav2Vec2Config() + + >>> # Initializing a model from the facebook/wav2vec2-base-960h style configuration + >>> model = Wav2Vec2Model(configuration) + + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" + model_type = "wav2vec2" + + def __init__( + self, + vocab_size=32, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout=0.1, + activation_dropout=0.1, + attention_dropout=0.1, + feat_proj_dropout=0.0, + feat_quantizer_dropout=0.0, + final_dropout=0.1, + layerdrop=0.1, + initializer_range=0.02, + layer_norm_eps=1e-5, + feat_extract_norm="group", + feat_extract_activation="gelu", + conv_dim=(512, 512, 512, 512, 512, 512, 512), + conv_stride=(5, 2, 2, 2, 2, 2, 2), + conv_kernel=(10, 3, 3, 3, 3, 2, 2), + conv_bias=False, + num_conv_pos_embeddings=128, + num_conv_pos_embedding_groups=16, + do_stable_layer_norm=False, + apply_spec_augment=True, + mask_time_prob=0.05, + mask_time_length=10, + mask_time_min_masks=2, + mask_feature_prob=0.0, + mask_feature_length=10, + mask_feature_min_masks=0, + num_codevectors_per_group=320, + num_codevector_groups=2, + contrastive_logits_temperature=0.1, + num_negatives=100, + codevector_dim=256, + proj_codevector_dim=256, + diversity_loss_weight=0.1, + ctc_loss_reduction="sum", + ctc_zero_infinity=False, + use_weighted_layer_sum=False, + classifier_proj_size=256, + tdnn_dim=(512, 512, 512, 512, 1500), + tdnn_kernel=(5, 3, 3, 1, 1), + tdnn_dilation=(1, 2, 3, 1, 1), + xvector_output_dim=512, + pad_token_id=0, + bos_token_id=1, + eos_token_id=2, + add_adapter=False, + adapter_kernel_size=3, + adapter_stride=2, + num_adapter_layers=3, + output_hidden_size=None, + use_scan=False, + fuse_matmuls=False, + **kwargs + ): + super().__init__(**kwargs, pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id) + self.hidden_size = hidden_size + self.feat_extract_norm = feat_extract_norm + self.feat_extract_activation = feat_extract_activation + self.conv_dim = list(conv_dim) + self.conv_stride = list(conv_stride) + self.conv_kernel = list(conv_kernel) + self.conv_bias = conv_bias + self.num_conv_pos_embeddings = num_conv_pos_embeddings + self.num_conv_pos_embedding_groups = num_conv_pos_embedding_groups + self.num_feat_extract_layers = len(self.conv_dim) + self.num_hidden_layers = num_hidden_layers + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.num_attention_heads = num_attention_heads + self.hidden_dropout = hidden_dropout + self.attention_dropout = attention_dropout + self.activation_dropout = activation_dropout + self.feat_proj_dropout = feat_proj_dropout + self.final_dropout = final_dropout + self.layerdrop = layerdrop + self.layer_norm_eps = layer_norm_eps + self.initializer_range = initializer_range + self.vocab_size = vocab_size + self.do_stable_layer_norm = do_stable_layer_norm + self.use_weighted_layer_sum = use_weighted_layer_sum + self.use_scan = use_scan + self.fuse_matmuls = fuse_matmuls + + if ( + (len(self.conv_stride) != self.num_feat_extract_layers) + or (len(self.conv_kernel) != self.num_feat_extract_layers) + or (len(self.conv_dim) != self.num_feat_extract_layers) + ): + raise ValueError( + "Configuration for convolutional layers is incorrect. " + "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`, " + f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride) " + f"= {len(self.conv_stride)}`, `len(config.conv_kernel) = {len(self.conv_kernel)}`." + ) + + # fine-tuning config parameters for SpecAugment: https://arxiv.org/abs/1904.08779 + self.apply_spec_augment = apply_spec_augment + self.mask_time_prob = mask_time_prob + self.mask_time_length = mask_time_length + self.mask_time_min_masks = mask_time_min_masks + self.mask_feature_prob = mask_feature_prob + self.mask_feature_length = mask_feature_length + self.mask_feature_min_masks = mask_feature_min_masks + + # parameters for pretraining with codevector quantized representations + self.num_codevectors_per_group = num_codevectors_per_group + self.num_codevector_groups = num_codevector_groups + self.contrastive_logits_temperature = contrastive_logits_temperature + self.feat_quantizer_dropout = feat_quantizer_dropout + self.num_negatives = num_negatives + self.codevector_dim = codevector_dim + self.proj_codevector_dim = proj_codevector_dim + self.diversity_loss_weight = diversity_loss_weight + + # ctc loss + self.ctc_loss_reduction = ctc_loss_reduction + self.ctc_zero_infinity = ctc_zero_infinity + + # adapter + self.add_adapter = add_adapter + self.adapter_kernel_size = adapter_kernel_size + self.adapter_stride = adapter_stride + self.num_adapter_layers = num_adapter_layers + self.output_hidden_size = output_hidden_size or hidden_size + + # SequenceClassification-specific parameter. Feel free to ignore for other classes. + self.classifier_proj_size = classifier_proj_size + + # XVector-specific parameters. Feel free to ignore for other classes. + self.tdnn_dim = list(tdnn_dim) + self.tdnn_kernel = list(tdnn_kernel) + self.tdnn_dilation = list(tdnn_dilation) + self.xvector_output_dim = xvector_output_dim + + @property + def inputs_to_logits_ratio(self): + return functools.reduce(operator.mul, self.conv_stride, 1) diff --git a/models/modeling_flax_bart.py b/models/modeling_flax_bart.py new file mode 100644 index 0000000000000000000000000000000000000000..08f77b24bf09be356713bbbc77955ec5d1da3b1b --- /dev/null +++ b/models/modeling_flax_bart.py @@ -0,0 +1,816 @@ +# coding=utf-8 +# Copyright 2021 The Fairseq Authors and The Google Flax Team Authors And The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Flax Bart model.""" + +import math +import random +from functools import partial +from typing import Optional, Tuple + +import numpy as np + +import flax.linen as nn +import jax +import jax.numpy as jnp +from flax.core.frozen_dict import FrozenDict, unfreeze +from flax.linen import combine_masks, make_causal_mask +from flax.linen import partitioning as nn_partitioning +from flax.linen.attention import dot_product_attention_weights +from jax import lax +from jax.random import PRNGKey + +from transformers.modeling_flax_outputs import ( + FlaxBaseModelOutputWithPastAndCrossAttentions, + FlaxCausalLMOutputWithCrossAttentions, +) +from transformers.modeling_flax_utils import ACT2FN, FlaxPreTrainedModel + +from models import BartConfig + + +scan_with_axes = nn_partitioning.scan_with_axes +remat = nn_partitioning.remat + + +def shift_tokens_right(input_ids: np.array, pad_token_id: int, decoder_start_token_id: int) -> np.ndarray: + """ + Shift input ids one token to the right. + """ + shifted_input_ids = np.zeros_like(input_ids) + shifted_input_ids[:, 1:] = input_ids[:, :-1] + shifted_input_ids[:, 0] = decoder_start_token_id + + shifted_input_ids = np.where(shifted_input_ids == -100, pad_token_id, shifted_input_ids) + return shifted_input_ids + + +class FlaxBartAttention(nn.Module): + config: BartConfig + embed_dim: int + num_heads: int + dropout: float = 0.0 + causal: bool = False + bias: bool = True + dtype: jnp.dtype = jnp.float32 # the dtype of the computation + + def setup(self) -> None: + self.head_dim = self.embed_dim // self.num_heads + if self.head_dim * self.num_heads != self.embed_dim: + raise ValueError( + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim}" + f" and `num_heads`: {self.num_heads})." + ) + + dense = partial( + nn.Dense, + self.embed_dim, + use_bias=self.bias, + dtype=self.dtype, + kernel_init=jax.nn.initializers.normal(self.config.init_std), + ) + + self.q_proj, self.k_proj, self.v_proj = dense(), dense(), dense() + + self.fused_proj = nn.Dense( + self.embed_dim * 3, + use_bias=self.bias, + dtype=self.dtype, + kernel_init=jax.nn.initializers.normal(self.config.init_std), + ) + + self.fused_key_value = nn.Dense( + self.embed_dim * 2, + use_bias=self.bias, + dtype=self.dtype, + kernel_init=jax.nn.initializers.normal(self.config.init_std), + ) + + self.out_proj = dense() + + self.dropout_layer = nn.Dropout(rate=self.dropout) + + if self.causal: + self.causal_mask = make_causal_mask( + jnp.ones((1, self.config.max_position_embeddings), dtype="bool"), dtype="bool" + ) + + def _split_heads(self, hidden_states): + return hidden_states.reshape(hidden_states.shape[:2] + (self.num_heads, self.head_dim)) + + def _merge_heads(self, hidden_states): + return hidden_states.reshape(hidden_states.shape[:2] + (self.embed_dim,)) + + @nn.compact + def _concatenate_to_cache(self, key, value, query, attention_mask): + """ + This function takes projected key, value states from a single input token and concatenates the states to cached + states from previous steps. This function is slighly adapted from the official Flax repository: + https://github.com/google/flax/blob/491ce18759622506588784b4fca0e4bf05f8c8cd/flax/linen/attention.py#L252 + """ + # detect if we're initializing by absence of existing cache data. + is_initialized = self.has_variable("cache", "cached_key") + cached_key = self.variable("cache", "cached_key", jnp.zeros, key.shape, key.dtype) + cached_value = self.variable("cache", "cached_value", jnp.zeros, value.shape, value.dtype) + cache_index = self.variable("cache", "cache_index", lambda: jnp.array(0, dtype=jnp.int32)) + + if is_initialized: + *batch_dims, max_length, num_heads, depth_per_head = cached_key.value.shape + # update key, value caches with our new 1d spatial slices + cur_index = cache_index.value + indices = (0,) * len(batch_dims) + (cur_index, 0, 0) + key = lax.dynamic_update_slice(cached_key.value, key, indices) + value = lax.dynamic_update_slice(cached_value.value, value, indices) + cached_key.value = key + cached_value.value = value + num_updated_cache_vectors = query.shape[1] + cache_index.value = cache_index.value + num_updated_cache_vectors + # causal mask for cached decoder self-attention: our single query position should only attend to those key positions that have already been generated and cached, not the remaining zero elements. + pad_mask = jnp.broadcast_to( + jnp.arange(max_length) < cur_index + num_updated_cache_vectors, + tuple(batch_dims) + (1, num_updated_cache_vectors, max_length), + ) + attention_mask = combine_masks(pad_mask, attention_mask) + return key, value, attention_mask + + def __call__( + self, + hidden_states: jnp.ndarray, + key_value_states: Optional[jnp.ndarray] = None, + attention_mask: Optional[jnp.ndarray] = None, + init_cache: bool = False, + deterministic: bool = True, + ) -> Tuple[jnp.ndarray]: + """Input shape: Batch x Time x Channel""" + + # if key_value_states are provided this layer is used as a cross-attention layer + # for the decoder + is_cross_attention = key_value_states is not None + batch_size = hidden_states.shape[0] + + if self.config.fuse_matmuls: + # get key, value proj + if is_cross_attention: + # get query proj + query_states = self.q_proj(hidden_states) + # cross_attentions + attention_states = self.fused_key_value(key_value_states) + key_states, value_states = jnp.split(attention_states, 2, axis=-1) + else: + attention_states = self.fused_proj(hidden_states) + query_states, key_states, value_states = jnp.split(attention_states, 3, axis=-1) + + else: + # get query proj + query_states = self.q_proj(hidden_states) + # get key, value proj + if is_cross_attention: + # cross_attentions + key_states = self.k_proj(key_value_states) + value_states = self.v_proj(key_value_states) + else: + # self_attention + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + query_states = self._split_heads(query_states) + key_states = self._split_heads(key_states) + value_states = self._split_heads(value_states) + + # handle cache prepare causal attention mask + if self.causal: + query_length, key_length = query_states.shape[1], key_states.shape[1] + if self.has_variable("cache", "cached_key"): + mask_shift = self.variables["cache"]["cache_index"] + max_decoder_length = self.variables["cache"]["cached_key"].shape[1] + causal_mask = lax.dynamic_slice( + self.causal_mask, (0, 0, mask_shift, 0), (1, 1, query_length, max_decoder_length) + ) + else: + causal_mask = self.causal_mask[:, :, :query_length, :key_length] + causal_mask = jnp.broadcast_to(causal_mask, (batch_size,) + causal_mask.shape[1:]) + + # combine masks if needed + if attention_mask is not None and self.causal: + attention_mask = jnp.broadcast_to(jnp.expand_dims(attention_mask, axis=(-3, -2)), causal_mask.shape) + attention_mask = combine_masks(attention_mask, causal_mask) + elif self.causal: + attention_mask = causal_mask + elif attention_mask is not None: + attention_mask = jnp.expand_dims(attention_mask, axis=(-3, -2)) + + # During fast autoregressive decoding, we feed one position at a time, + # and cache the keys and values step by step. + if self.causal and (self.has_variable("cache", "cached_key") or init_cache): + key_states, value_states, attention_mask = self._concatenate_to_cache( + key_states, value_states, query_states, attention_mask + ) + + # Convert the boolean attention mask to an attention bias. + if attention_mask is not None: + # attention mask in the form of attention bias + attention_bias = lax.select( + attention_mask > 0, + jnp.full(attention_mask.shape, 0.0).astype(self.dtype), + jnp.full(attention_mask.shape, float("-inf")).astype(self.dtype), + ) + else: + attention_bias = None + + dropout_rng = None + if not deterministic and self.dropout > 0.0: + dropout_rng = self.make_rng("dropout") + + attn_weights = dot_product_attention_weights( + query_states, + key_states, + bias=attention_bias, + dropout_rng=dropout_rng, + dropout_rate=self.dropout, + broadcast_dropout=True, + deterministic=deterministic, + dtype=self.dtype, + precision=None, + ) + + attn_output = jnp.einsum("...hqk,...khd->...qhd", attn_weights, value_states) + attn_output = self._merge_heads(attn_output) + attn_output = self.out_proj(attn_output) + + return attn_output, attn_weights + + +class FlaxBartDecoderLayer(nn.Module): + config: BartConfig + dtype: jnp.dtype = jnp.float32 + + def setup(self) -> None: + self.embed_dim = self.config.d_model + self.self_attn = FlaxBartAttention( + config=self.config, + embed_dim=self.embed_dim, + num_heads=self.config.decoder_attention_heads, + dropout=self.config.attention_dropout, + causal=True, + dtype=self.dtype, + ) + self.dropout_layer = nn.Dropout(rate=self.config.dropout) + self.activation_fn = ACT2FN[self.config.activation_function] + self.activation_dropout_layer = nn.Dropout(rate=self.config.activation_dropout) + + self.self_attn_layer_norm = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05) + self.encoder_attn = FlaxBartAttention( + config=self.config, + embed_dim=self.embed_dim, + num_heads=self.config.decoder_attention_heads, + dropout=self.config.attention_dropout, + dtype=self.dtype, + ) + self.encoder_attn_layer_norm = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05) + self.fc1 = nn.Dense( + self.config.encoder_ffn_dim, + dtype=self.dtype, + kernel_init=jax.nn.initializers.normal(self.config.init_std), + ) + self.fc2 = nn.Dense( + self.embed_dim, dtype=self.dtype, kernel_init=jax.nn.initializers.normal(self.config.init_std) + ) + self.final_layer_norm = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05) + + def __call__( + self, + hidden_states: jnp.ndarray, + attention_mask: jnp.ndarray, + encoder_hidden_states: Optional[jnp.ndarray] = None, + encoder_attention_mask: Optional[jnp.ndarray] = None, + init_cache: bool = False, + output_attentions: bool = True, + deterministic: bool = True, + ) -> Tuple[jnp.ndarray]: + + if self.config.use_scan: + hidden_states = hidden_states[0] + + residual = hidden_states + + # Self Attention + hidden_states, self_attn_weights = self.self_attn( + hidden_states=hidden_states, attention_mask=attention_mask, init_cache=init_cache + ) + hidden_states = self.dropout_layer(hidden_states, deterministic=deterministic) + hidden_states = residual + hidden_states + hidden_states = self.self_attn_layer_norm(hidden_states) + + # Cross-Attention Block + cross_attn_weights = None + if encoder_hidden_states is not None: + residual = hidden_states + + hidden_states, cross_attn_weights = self.encoder_attn( + hidden_states=hidden_states, + key_value_states=encoder_hidden_states, + attention_mask=encoder_attention_mask, + ) + hidden_states = self.dropout_layer(hidden_states, deterministic=deterministic) + hidden_states = residual + hidden_states + hidden_states = self.encoder_attn_layer_norm(hidden_states) + + # Fully Connected + residual = hidden_states + hidden_states = self.activation_fn(self.fc1(hidden_states)) + hidden_states = self.activation_dropout_layer(hidden_states, deterministic=deterministic) + hidden_states = self.fc2(hidden_states) + hidden_states = self.dropout_layer(hidden_states, deterministic=deterministic) + hidden_states = residual + hidden_states + hidden_states = self.final_layer_norm(hidden_states) + + outputs = (hidden_states,) + + if output_attentions: + outputs += (self_attn_weights, cross_attn_weights) + + if self.config.use_scan: + outputs = (outputs, None) + + return outputs + + +class FlaxBartDecoderLayerCollection(nn.Module): + config: BartConfig + dtype: jnp.dtype = jnp.float32 # the dtype of the computation + + @nn.compact + def __call__( + self, + hidden_states, + attention_mask, + encoder_hidden_states: Optional[jnp.ndarray] = None, + encoder_attention_mask: Optional[jnp.ndarray] = None, + deterministic: bool = True, + init_cache: bool = False, + output_attentions: bool = False, + output_hidden_states: bool = False, + return_dict: bool = True, + ): + # decoder layers + all_hidden_states = () if output_hidden_states else None + all_self_attns = () if output_attentions else None + all_cross_attentions = () if (output_attentions and encoder_hidden_states is not None) else None + + num_decoder_layers = self.config.decoder_layers + BlockDecoderLayer = ( + remat( + FlaxBartDecoderLayer, + static_argnums=(4, 5, 6), + prevent_cse=not self.config.use_scan, + ) + if self.config.gradient_checkpointing + else FlaxBartDecoderLayer + ) + + if self.config.use_scan: + # since all decoder layers are the same, we use nn.scan directly + assert not output_attentions, "cannot use `scan` with `output_attentions` set to `True`" + assert not output_hidden_states, "cannot use `scan` with `output_hidden_states` set to `True`" + hidden_states = (hidden_states,) + + # TODO: add layerdrop in checkpointed scan (note: default value for layerdrop in config is zero) + hidden_states, _ = scan_with_axes( + BlockDecoderLayer, + variable_axes={"params": 0, "cache": 0}, + split_rngs={"params": True, "dropout": True}, + in_axes=(nn.broadcast, nn.broadcast, nn.broadcast, nn.broadcast, nn.broadcast, nn.broadcast), + length=num_decoder_layers, + )(self.config, dtype=self.dtype, name="FlaxBartDecoderLayers")( + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + init_cache, + output_attentions, + deterministic, + ) + hidden_states = hidden_states[0] + + else: + for layer in range(num_decoder_layers): + if output_hidden_states: + all_hidden_states += (hidden_states,) + # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) + dropout_probability = random.uniform(0, 1) + if not deterministic and (dropout_probability < self.config.decoder_layerdrop): + layer_outputs = (None, None, None) + else: + layer_outputs = BlockDecoderLayer(self.config, dtype=self.dtype, name=str(layer),)( + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + init_cache, + output_attentions, + deterministic, + ) + + hidden_states = layer_outputs[0] + if output_attentions: + all_self_attns += (layer_outputs[1],) + + if encoder_hidden_states is not None: + all_cross_attentions += (layer_outputs[2],) + + # add hidden states from the last decoder layer + if output_hidden_states: + all_hidden_states += (hidden_states,) + + outputs = [hidden_states, all_hidden_states, all_self_attns, all_cross_attentions] + + if not return_dict: + return tuple(v for v in outputs if v is not None) + + return FlaxBaseModelOutputWithPastAndCrossAttentions( + last_hidden_state=hidden_states, + hidden_states=all_hidden_states, + attentions=all_self_attns, + cross_attentions=all_cross_attentions, + ) + + +class FlaxBartDecoder(nn.Module): + config: BartConfig + embed_tokens: nn.Embed + dtype: jnp.dtype = jnp.float32 # the dtype of the computation + + def setup(self): + self.dropout_layer = nn.Dropout(rate=self.config.dropout) + + embed_dim = self.config.d_model + self.padding_idx = self.config.pad_token_id + self.max_target_positions = self.config.max_position_embeddings + self.embed_scale = math.sqrt(self.config.d_model) if self.config.scale_embedding else 1.0 + + # Bart is set up so that if padding_idx is specified then offset the embedding ids by 2 + # and adjust num_embeddings appropriately. Other models don't have this hack + self.offset = 2 + self.embed_positions = nn.Embed( + self.config.max_position_embeddings + self.offset, + embed_dim, + embedding_init=jax.nn.initializers.normal(self.config.init_std), + ) + + self.layers = FlaxBartDecoderLayerCollection(self.config, self.dtype) + self.layernorm_embedding = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05) + + def __call__( + self, + input_ids, + attention_mask, + position_ids, + encoder_hidden_states: Optional[jnp.ndarray] = None, + encoder_attention_mask: Optional[jnp.ndarray] = None, + init_cache: bool = False, + output_attentions: bool = False, + output_hidden_states: bool = False, + return_dict: bool = True, + deterministic: bool = True, + ): + input_shape = input_ids.shape + input_ids = input_ids.reshape(-1, input_shape[-1]) + + inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale + + # embed positions + positions = self.embed_positions(position_ids + self.offset) + + hidden_states = inputs_embeds + positions + hidden_states = self.layernorm_embedding(hidden_states) + + hidden_states = self.dropout_layer(hidden_states, deterministic=deterministic) + + outputs = self.layers( + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + deterministic=deterministic, + init_cache=init_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + if not return_dict: + return outputs + + return FlaxBaseModelOutputWithPastAndCrossAttentions( + last_hidden_state=outputs.last_hidden_state, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + cross_attentions=outputs.cross_attentions, + ) + + +class FlaxBartDecoderPreTrainedModel(FlaxPreTrainedModel): + config_class = BartConfig + base_model_prefix: str = "model" + module_class: nn.Module = None + + def __init__( + self, + config: BartConfig, + input_shape: Tuple[int] = (1, 1), + seed: int = 0, + dtype: jnp.dtype = jnp.float32, + _do_init: bool = True, + **kwargs + ): + config.is_decoder = True + config.is_encoder_decoder = False + module = self.module_class(config=config, dtype=dtype, **kwargs) + super().__init__(config, module, input_shape=input_shape, seed=seed, dtype=dtype, _do_init=_do_init) + + def init_weights(self, rng: jax.random.PRNGKey, input_shape: Tuple) -> FrozenDict: + # init input tensors + input_ids = jnp.zeros(input_shape, dtype="i4") + attention_mask = jnp.ones_like(input_ids) + + batch_size, sequence_length = input_ids.shape + position_ids = jnp.broadcast_to(jnp.arange(sequence_length)[None, :], (batch_size, sequence_length)) + + params_rng, dropout_rng = jax.random.split(rng) + rngs = {"params": params_rng, "dropout": dropout_rng} + encoder_hidden_states = jnp.zeros(input_shape + (self.config.d_model,)) + encoder_attention_mask = attention_mask + module_init_outputs = self.module.init( + rngs, + input_ids, + attention_mask, + position_ids, + encoder_hidden_states, + encoder_attention_mask, + return_dict=False, + ) + return module_init_outputs["params"] + + def init_cache(self, batch_size, max_length): + r""" + Args: + batch_size (`int`): + batch_size used for fast auto-regressive decoding. Defines the batch size of the initialized cache. + max_length (`int`): + maximum possible length for auto-regressive decoding. Defines the sequence length of the initialized + cache. + """ + # init input variables to retrieve cache + input_ids = jnp.ones((batch_size, max_length), dtype="i4") + attention_mask = jnp.ones_like(input_ids, dtype="i4") + position_ids = jnp.broadcast_to(jnp.arange(jnp.atleast_2d(input_ids).shape[-1]), input_ids.shape) + + init_variables = self.module.init( + jax.random.PRNGKey(0), input_ids, attention_mask, position_ids, return_dict=False, init_cache=True + ) + return unfreeze(init_variables["cache"]) + + def __call__( + self, + input_ids: jnp.ndarray, + attention_mask: Optional[jnp.ndarray] = None, + position_ids: Optional[jnp.ndarray] = None, + encoder_hidden_states: Optional[jnp.ndarray] = None, + encoder_attention_mask: Optional[jnp.ndarray] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + train: bool = False, + params: dict = None, + past_key_values: dict = None, + dropout_rng: PRNGKey = None, + ): + """ + Args: + input_ids (`jnp.ndarray` of shape `(target_batch_size, target_sequence_length)`): + Indices of decoder input sequence tokens in the vocabulary. + + Indices can be obtained using [`BartTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are decoder input IDs?](../glossary#decoder-input-ids) + + For translation and summarization training, `decoder_input_ids` should be provided. If no + `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right + for denoising pre-training following the paper. + attention_mask (`jnp.ndarray` of shape `(target_batch_size, target_sequence_length)`, *optional*): + Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also + be used by default. + + If you want to change padding behavior, you should modify to your needs. See diagram 1 in [the + paper](https://arxiv.org/abs/1910.13461) for more information on the default strategy. + position_ids (`numpy.ndarray` of shape `(target_batch_size, sequence_length)`, *optional*): + Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the + range `[0, config.max_position_embeddings - 1]`. + encoder_hidden_states (`jnp.ndarray` of shape `(batch_size, sequence_length, hidden_size)`): + A sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder. + encoder_attention_mask (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. + past_key_values (`Dict[str, np.ndarray]`, *optional*, returned by `init_cache` or when passing previous `past_key_values`): + Dictionary of pre-computed hidden-states (key and values in the attention blocks) that can be used for fast + auto-regressive decoding. Pre-computed key and value hidden-states are of shape *[batch_size, max_length]*. + """ + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.return_dict + + if encoder_hidden_states is not None and encoder_attention_mask is None: + batch_size, sequence_length = encoder_hidden_states.shape[:2] + encoder_attention_mask = jnp.ones((batch_size, sequence_length)) + + # prepare decoder inputs + if attention_mask is None: + attention_mask = jnp.ones_like(input_ids) + if position_ids is None: + batch_size, sequence_length = input_ids.shape + position_ids = jnp.broadcast_to(jnp.arange(sequence_length)[None, :], (batch_size, sequence_length)) + + # Handle any PRNG if needed + rngs = {"dropout": dropout_rng} if dropout_rng is not None else {} + + inputs = {"params": params or self.params} + + # if past_key_values are passed then cache is already initialized a private flag init_cache has to be passed + # down to ensure cache is used. It has to be made sure that cache is marked as mutable so that it can be + # changed by FlaxBartAttention module + if past_key_values: + inputs["cache"] = past_key_values + mutable = ["cache"] + else: + mutable = False + + outputs = self.module.apply( + inputs, + input_ids=jnp.array(input_ids, dtype="i4"), + attention_mask=jnp.array(attention_mask, dtype="i4"), + position_ids=jnp.array(position_ids, dtype="i4"), + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + deterministic=not train, + rngs=rngs, + mutable=mutable, + ) + + # add updated cache to model output + if past_key_values is not None and return_dict: + outputs, past_key_values = outputs + outputs["past_key_values"] = unfreeze(past_key_values["cache"]) + return outputs + elif past_key_values is not None and not return_dict: + outputs, past_key_values = outputs + outputs = outputs[:1] + (unfreeze(past_key_values["cache"]),) + outputs[1:] + + return outputs + + +class FlaxBartDecoderWrapper(nn.Module): + """ + This wrapper class is a helper class to correctly load pretrained checkpoints when the causal language model is + used in combination with the [`EncoderDecoderModel`] framework. + """ + + config: BartConfig + dtype: jnp.dtype = jnp.float32 + + def setup(self): + embed_dim = self.config.d_model + embed_tokens = nn.Embed( + self.config.vocab_size, + embed_dim, + embedding_init=jax.nn.initializers.normal(self.config.init_std), + ) + self.decoder = FlaxBartDecoder(config=self.config, embed_tokens=embed_tokens, dtype=self.dtype) + + def __call__(self, *args, **kwargs): + return self.decoder(*args, **kwargs) + + +class FlaxBartForCausalLMModule(nn.Module): + """Bart Decoder Module with a language modeling head on top (linear layer with weights tied to the input embeddings) + e.g. for autoregressive tasks. + """ + + config: BartConfig + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.model = FlaxBartDecoderWrapper(config=self.config, dtype=self.dtype) + self.lm_head = nn.Dense( + self.config.vocab_size, + use_bias=False, + dtype=self.dtype, + kernel_init=jax.nn.initializers.normal(self.config.init_std), + ) + + def __call__( + self, + input_ids, + attention_mask, + position_ids, + encoder_hidden_states: Optional[jnp.ndarray] = None, + encoder_attention_mask: Optional[jnp.ndarray] = None, + init_cache: bool = False, + output_attentions: bool = False, + output_hidden_states: bool = False, + return_dict: bool = True, + deterministic: bool = True, + ): + + outputs = self.model( + input_ids, + attention_mask, + position_ids, + encoder_hidden_states, + encoder_attention_mask, + deterministic=deterministic, + init_cache=init_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + hidden_states = outputs[0] + + if self.config.tie_word_embeddings: + shared_embedding = self.model.variables["params"]["decoder"]["embed_tokens"]["embedding"] + lm_logits = self.lm_head.apply({"params": {"kernel": shared_embedding.T}}, hidden_states) + else: + lm_logits = self.lm_head(hidden_states) + + if not return_dict: + return (lm_logits,) + outputs[1:] + + return FlaxCausalLMOutputWithCrossAttentions( + logits=lm_logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + cross_attentions=outputs.cross_attentions, + ) + + +class FlaxBartForCausalLM(FlaxBartDecoderPreTrainedModel): + """Bart Decoder Model with a language modeling head on top (linear layer with weights tied to the input embeddings) + e.g. for autoregressive tasks. + """ + + module_class = FlaxBartForCausalLMModule + + def prepare_inputs_for_generation(self, input_ids, max_length, attention_mask: Optional[jnp.DeviceArray] = None): + # initializing the cache + batch_size, seq_length = input_ids.shape + + past_key_values = self.init_cache(batch_size, max_length) + # Note that usually one would have to put 0's in the attention_mask for x > input_ids.shape[-1] and x < cache_length. + # But since the decoder uses a causal mask, those positions are masked anyway. + # Thus, we can create a single static attention_mask here, which is more efficient for compilation + extended_attention_mask = jnp.ones((batch_size, max_length), dtype="i4") + if attention_mask is not None: + position_ids = attention_mask.cumsum(axis=-1) - 1 + extended_attention_mask = lax.dynamic_update_slice(extended_attention_mask, attention_mask, (0, 0)) + else: + position_ids = jnp.broadcast_to(jnp.arange(seq_length, dtype="i4")[None, :], (batch_size, seq_length)) + + return { + "past_key_values": past_key_values, + "attention_mask": extended_attention_mask, + "position_ids": position_ids, + } + + def update_inputs_for_generation(self, model_outputs, model_kwargs): + model_kwargs["past_key_values"] = model_outputs.past_key_values + model_kwargs["position_ids"] = model_kwargs["position_ids"][:, -1:] + 1 + return model_kwargs diff --git a/models/modeling_flax_speech_encoder_decoder.py b/models/modeling_flax_speech_encoder_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..2bc78e7fd3cc5aaea20839c491a23b918ac30eaf --- /dev/null +++ b/models/modeling_flax_speech_encoder_decoder.py @@ -0,0 +1,1245 @@ +# coding=utf-8 +# Copyright 2022 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Classes to support Flax Speech-Encoder-Decoder architectures""" + +import os +from functools import partial +from typing import Optional, Tuple, Union, Dict + +import flax +import flax.linen as nn +import jax +import jax.numpy as jnp +from flax.core.frozen_dict import FrozenDict, unfreeze +from jax import lax +from jax.random import PRNGKey +import numpy as np + +from transformers.modeling_flax_outputs import FlaxBaseModelOutput, FlaxCausalLMOutputWithCrossAttentions, FlaxSeq2SeqLMOutput +from transformers.modeling_flax_utils import FlaxPreTrainedModel +from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings, ModelOutput +from transformers.generation_flax_utils import FlaxLogitsProcessorList +from models import ( + FlaxWav2Vec2Model, + FlaxWav2Vec2Module, + FlaxBartForCausalLM, + FlaxBartForCausalLMModule, + BartConfig, + Wav2Vec2Config, + SpeechEncoderDecoderConfig, +) + +logger = logging.get_logger(__name__) + +_CONFIG_FOR_DOC = "SpeechEncoderDecoderConfig" + +SPEECH_ENCODER_DECODER_START_DOCSTRING = r""" + This class can be used to initialize a speech-sequence-to-text-sequence model with any pretrained speech + autoencoding model as the encoder and any pretrained text autoregressive model as the decoder. The encoder is + loaded via [`~AutoModel.from_pretrained`] function and the decoder is loaded via + [`~AutoModelForCausalLM.from_pretrained`] function. Cross-attention layers are automatically added to the decoder + and should be fine-tuned on a downstream generative task, like summarization. + + The effectiveness of initializing sequence-to-sequence models with pretrained checkpoints for sequence generation + tasks was shown in [Leveraging Pre-trained Checkpoints for Sequence Generation + Tasks](https://arxiv.org/abs/1907.12461) by Sascha Rothe, Shashi Narayan, Aliaksei Severyn. Michael Matena, Yanqi + Zhou, Wei Li, Peter J. Liu. + + Additionally, in [Large-Scale Self- and Semi-Supervised Learning for Speech + Translation](https://arxiv.org/abs/2104.06678) it is shown how leveraging large pretrained speech models for speech + translation yields a significant performance improvement. + + After such an Speech-Encoder Decoder model has been trained/fine-tuned, it can be saved/loaded just like any other + models (see the examples for more information). + + This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the + library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads + etc.) + + This model is also a Flax Linen + [flax.nn.Module](https://flax.readthedocs.io/en/latest/_autosummary/flax.nn.module.html) subclass. Use it as a + regular Flax Module and refer to the Flax documentation for all matter related to general usage and behavior. + + Parameters: + config ([`SpeechEncoderDecoderConfig`]): Model configuration class with all the parameters of the model. + Initializing with a config file does not load the weights associated with the model, only the + configuration. Check out the [`~FlaxPreTrainedModel.from_pretrained`] method to load the model weights. + dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`): + The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and + `jax.numpy.bfloat16` (on TPUs). + + This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If + specified all the computation will be performed with the given `dtype`. + + **Note that this only specifies the dtype of the computation and does not influence the dtype of model + parameters.** + + If you wish to change the dtype of the model parameters, see [`~FlaxPreTrainedModel.to_fp16`] and + [`~FlaxPreTrainedModel.to_bf16`]. +""" + +SPEECH_ENCODER_DECODER_INPUTS_DOCSTRING = r""" + Args: + inputs (`jnp.ndarray` of shape `(batch_size, sequence_length)` or `(batch_size, sequence_length, feature_dim)`, *optional*): + Float values of input raw speech waveform or speech features. Values can be obtained by loading a *.flac* + or *.wav* audio file into an array of type *List[float]* or a *numpy.ndarray*, *e.g.* via the soundfile + library (*pip install soundfile*). To prepare the array into *inputs*, either the [`Wav2Vec2Processor`] or + [`Speech2TextProcessor`] should be used for padding and conversion into a tensor of type + *torch.FloatTensor*. + attention_mask (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + decoder_input_ids (`jnp.ndarray` of shape `(batch_size, target_sequence_length)`, *optional*): + Indices of decoder input sequence tokens in the vocabulary. + + Indices can be obtained using [`PreTrainedTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are input IDs?](../glossary#input-ids) + + If `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see + `past_key_values`). + + For sequence to sequence training, `decoder_input_ids` should be provided. `decoder_input_ids` should be + created outside of the model by shifting the `labels` to the right, replacing -100 by the `pad_token_id` + and prepending them with the `decoder_start_token_id`. + decoder_attention_mask (`jnp.ndarray` of shape `(batch_size, target_sequence_length)`, *optional*): + Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also + be used by default. + decoder_position_ids (`numpy.ndarray` of shape `(batch_size, sequence_length)`, *optional*): + Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the + range `[0, config.decoder.max_position_embeddings - 1]`. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + If set to `True`, the model will return a [`~utils.FlaxSeq2SeqLMOutput`] instead of a plain tuple. +""" + +SPEECH_ENCODER_DECODER_ENCODE_INPUTS_DOCSTRING = r""" + Args: + inputs (`jnp.ndarray` of shape `(batch_size, sequence_length)` or `(batch_size, sequence_length, feature_dim)`, *optional*): + Float values of input raw speech waveform or speech features. Values can be obtained by loading a *.flac* + or *.wav* audio file into an array of type *List[float]* or a *numpy.ndarray*, *e.g.* via the soundfile + library (*pip install soundfile*). To prepare the array into *inputs*, either the [`Wav2Vec2Processor`] or + [`Speech2TextProcessor`] should be used for padding and conversion into a tensor of type + *torch.FloatTensor*. + attention_mask (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + If set to `True`, the model will return a [`~utils.FlaxBaseModelOutput`] instead of a plain tuple. +""" + +SPEECH_ENCODER_DECODER_DECODE_INPUTS_DOCSTRING = r""" + Args: + decoder_input_ids (`jnp.ndarray` of shape `(batch_size, target_sequence_length)`, *optional*): + Indices of decoder input sequence tokens in the vocabulary. + + Indices can be obtained using [`PreTrainedTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are decoder input IDs?](../glossary#decoder-input-ids) + + If `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see + `past_key_values`). + + For sequence to sequence training, `decoder_input_ids` should be provided. `decoder_input_ids` should be + created outside of the model by shifting the `labels` to the right, replacing -100 by the `pad_token_id` + and prepending them with the `decoder_start_token_id`. + encoder_outputs (`tuple(tuple(jnp.ndarray)`): + Tuple consists of (`last_hidden_state`, *optional*: `hidden_states`, *optional*: `attentions`) + `last_hidden_state` of shape `(batch_size, sequence_length, hidden_size)`, *optional*) is a sequence of + hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder. + encoder_attention_mask (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + decoder_attention_mask (`jnp.ndarray` of shape `(batch_size, target_sequence_length)`, *optional*): + Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also + be used by default. + decoder_position_ids (`numpy.ndarray` of shape `(batch_size, sequence_length)`, *optional*): + Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the + range `[0, config.decoder.max_position_embeddings - 1]`. + past_key_values (`Dict[str, np.ndarray]`, *optional*, returned by `init_cache` or when passing previous `past_key_values`): + Dictionary of pre-computed hidden-states (key and values in the attention blocks) that can be used for fast + auto-regressive decoding. Pre-computed key and value hidden-states are of shape *[batch_size, max_length]*. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + If set to `True`, the model will return a [`~utils.FlaxCausalLMOutputWithCrossAttentions`] instead of a + plain tuple. +""" + +@flax.struct.dataclass +class FlaxBeamSearchOutput(ModelOutput): + """ + Flax Base class for outputs of decoder-only generation models using greedy search. + + + Args: + sequences (`jnp.ndarray` of shape `(batch_size, max_length)`): + The generated sequences. + scores (`jnp.ndarray` of shape `(batch_size,)`): + The scores (log probabilites) of the generated sequences. + """ + + sequences: jnp.ndarray = None + scores: jnp.ndarray = None + + +@flax.struct.dataclass +class BeamSearchState: + cur_len: jnp.ndarray + running_sequences: jnp.ndarray + running_scores: jnp.ndarray + sequences: jnp.ndarray + scores: jnp.ndarray + is_sent_finished: jnp.ndarray + model_kwargs: Dict[str, jnp.ndarray] + + + + +class FlaxSpeechEncoderDecoderModule(nn.Module): + config: SpeechEncoderDecoderConfig + dtype: jnp.dtype = jnp.float32 + + def setup(self): + encoder_config = self.config.encoder + decoder_config = self.config.decoder + + # TODO: configure FlaxAutoModel mappings (required when trialling different encoder-decoder combinations) + encoder_module = FlaxWav2Vec2Module + decoder_module = FlaxBartForCausalLMModule + + self.encoder = encoder_module(encoder_config, dtype=self.dtype) + self.decoder = decoder_module(decoder_config, dtype=self.dtype) + + # encoder outputs might need to be projected to different dimension for decoder + if ( + self.encoder.config.hidden_size != self.decoder.config.hidden_size + and self.decoder.config.cross_attention_hidden_size is None + ): + self.enc_to_dec_proj = nn.Dense( + self.decoder.config.hidden_size, + kernel_init=jax.nn.initializers.normal(self.decoder.config.initializer_range), + dtype=self.dtype, + ) + else: + self.enc_to_dec_proj = None + + def _get_feat_extract_output_lengths( + self, input_lengths: Union[jnp.ndarray, int], add_adapter: Optional[bool] = None + ): + """ + Computes the output length of the convolutional layers + """ + + add_adapter = self.config.encoder.add_adapter if add_adapter is None else add_adapter + + def _conv_out_length(input_length, kernel_size, stride): + # 1D convolutional layer output length formula taken + # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html + return (input_length - kernel_size) // stride + 1 + + for kernel_size, stride in zip(self.config.encoder.conv_kernel, self.config.encoder.conv_stride): + input_lengths = _conv_out_length(input_lengths, kernel_size, stride) + + if add_adapter: + for _ in range(self.config.encoder.num_adapter_layers): + input_lengths = _conv_out_length(input_lengths, 1, self.config.encoder.adapter_stride) + + return input_lengths + + def _get_encoder_module(self): + return self.encoder + + def _get_projection_module(self): + return self.enc_to_dec_proj + + def _get_decoder_module(self): + return self.decoder + + def __call__( + self, + inputs, + attention_mask, + decoder_input_ids, + decoder_attention_mask, + decoder_position_ids, + encoder_outputs=None, + extract_features=None, + output_attentions: bool = False, + output_hidden_states: bool = False, + output_features: bool = False, + return_dict: bool = True, + deterministic: bool = True, + freeze_feature_encoder: bool = False, + ): + if encoder_outputs is None: + encoder_outputs = self.encoder( + inputs, + attention_mask=attention_mask, + extract_features=extract_features, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + output_features=output_features, + return_dict=return_dict, + deterministic=deterministic, + freeze_feature_encoder=freeze_feature_encoder, + ) + + if output_features: + return encoder_outputs + + encoder_hidden_states = encoder_outputs[0] + + # optionally project encoder_hidden_states + if self.enc_to_dec_proj is not None: + encoder_hidden_states = self.enc_to_dec_proj(encoder_hidden_states) + + # compute correct encoder attention mask + if attention_mask is not None: + encoder_attention_mask = self.encoder._get_feature_vector_attention_mask( + encoder_hidden_states.shape[1], attention_mask + ) + else: + encoder_attention_mask = None + + # flax script modeling_flax_wav2vec2.py + decoder_outputs = self.decoder( + input_ids=decoder_input_ids, + attention_mask=decoder_attention_mask, + position_ids=decoder_position_ids, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + deterministic=deterministic, + ) + + if not return_dict: + return decoder_outputs + encoder_outputs + + return FlaxSeq2SeqLMOutput( + logits=decoder_outputs.logits, + decoder_hidden_states=decoder_outputs.hidden_states, + decoder_attentions=decoder_outputs.attentions, + cross_attentions=decoder_outputs.cross_attentions, + encoder_last_hidden_state=encoder_hidden_states, + encoder_hidden_states=encoder_outputs.hidden_states, + encoder_attentions=encoder_outputs.attentions, + ) + + +@add_start_docstrings(SPEECH_ENCODER_DECODER_START_DOCSTRING) +class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): + r""" + [`FlaxSpeechEncoderDecoderModel`] is a generic model class that will be instantiated as a transformer architecture + with the module (flax.nn.Module) of one of the base model classes of the library as encoder module and another one + as decoder module when created with the :meth*~transformers.FlaxAutoModel.from_pretrained* class method for the + encoder and :meth*~transformers.FlaxAutoModelForCausalLM.from_pretrained* class method for the decoder. + """ + + config_class = SpeechEncoderDecoderConfig + base_model_prefix: str = "speech_encoder_decoder" + module_class = FlaxSpeechEncoderDecoderModule + + def __init__( + self, + config: SpeechEncoderDecoderConfig, + input_shape: Optional[Tuple] = None, + seed: int = 0, + dtype: jnp.dtype = jnp.float32, + _do_init: bool = True, + **kwargs + ): + + if not _do_init: + raise ValueError( + "`FlaxSpeechEncoderDecoderModel` cannot be created without initializing, `_do_init` must be `True`." + ) + + if config.decoder.cross_attention_hidden_size is not None: + # Raise ValueError or option to project enc to dec hidden_size (eg EncAdapterLayer) + if config.decoder.cross_attention_hidden_size != config.encoder.hidden_size: + raise ValueError( + "If `cross_attention_hidden_size` is specified in the decoder's configuration, " + "it has to be equal to the encoder's `hidden_size`. " + f"Got {config.decoder.cross_attention_hidden_size} for `config.decoder.cross_attention_hidden_size` " + f"and {config.encoder.hidden_size} for `config.encoder.hidden_size`." + ) + + # make sure input & output embeddings are not tied + config.tie_word_embeddings = False + module = self.module_class(config=config, dtype=dtype, **kwargs) + + if input_shape is None: + # speech encoders almost always downsample the sequence length dimension + encoder_input_length = 1024 + decoder_input_length = module._get_feat_extract_output_lengths(encoder_input_length) + input_shape = ((1, encoder_input_length), (1, decoder_input_length)) + + super().__init__(config, module, input_shape=input_shape, seed=seed, dtype=dtype, _do_init=_do_init) + + def init_weights(self, rng: jax.random.PRNGKey, input_shape: Tuple) -> FrozenDict: + encoder_input_shape, decoder_input_shape = input_shape + + # init input DeviceArrays + inputs = jnp.zeros(encoder_input_shape, dtype="f4") + attention_mask = jnp.ones_like(inputs, dtype="i4") + decoder_input_ids = jnp.zeros(decoder_input_shape, dtype="i4") + decoder_attention_mask = jnp.ones_like(decoder_input_ids) + + batch_size, sequence_length = inputs.shape + + decoder_batch_size, decoder_sequence_length = decoder_input_ids.shape + if not decoder_batch_size == batch_size: + raise ValueError( + f"The inputs of encoder and decoder should have the same batch size, but got {batch_size} for encoder and {decoder_batch_size} for decoder." + ) + decoder_position_ids = jnp.broadcast_to( + jnp.arange(decoder_sequence_length)[None, :], (decoder_batch_size, decoder_sequence_length) + ) + + params_rng, dropout_rng = jax.random.split(rng) + rngs = {"params": params_rng, "dropout": dropout_rng} + + return self.module.init( + rngs, + inputs, + attention_mask, + decoder_input_ids, + decoder_attention_mask, + decoder_position_ids, + )["params"] + + def init_cache(self, batch_size, max_length, encoder_outputs): + r""" + Args: + batch_size (`int`): + batch_size used for fast auto-regressive decoding. Defines the batch size of the initialized cache. + max_length (`int`): + maximum possible length for auto-regressive decoding. Defines the sequence length of the initialized + cache. + encoder_outputs (`Union[FlaxBaseModelOutput, tuple(tuple(jnp.ndarray)]`): + `encoder_outputs` consists of (`last_hidden_state`, *optional*: `hidden_states`, *optional*: + `attentions`). `last_hidden_state` of shape `(batch_size, sequence_length, hidden_size)`, *optional*) + is a sequence of hidden-states at the output of the last layer of the encoder. Used in the + cross-attention of the decoder. + """ + # init input variables to retrieve cache + decoder_input_ids = jnp.ones((batch_size, max_length), dtype="i4") + decoder_attention_mask = jnp.ones_like(decoder_input_ids) + decoder_position_ids = jnp.broadcast_to( + jnp.arange(jnp.atleast_2d(decoder_input_ids).shape[-1]), decoder_input_ids.shape + ) + + def _decoder_forward(module, decoder_input_ids, decoder_attention_mask, decoder_position_ids, **kwargs): + decoder_module = module._get_decoder_module() + return decoder_module( + input_ids=decoder_input_ids, + attention_mask=decoder_attention_mask, + position_ids=decoder_position_ids, + **kwargs, + ) + + init_variables = self.module.init( + jax.random.PRNGKey(0), + decoder_input_ids=decoder_input_ids, + decoder_attention_mask=decoder_attention_mask, + decoder_position_ids=decoder_position_ids, + encoder_hidden_states=encoder_outputs[0], + init_cache=True, + method=_decoder_forward, # we only need to call the decoder to init the cache + ) + return unfreeze(init_variables["cache"]) + + def _get_feat_extract_output_lengths( + self, input_lengths: Union[jnp.ndarray, int], add_adapter: Optional[bool] = None + ): + return self.module._get_feat_extract_output_lengths(input_lengths, add_adapter=add_adapter) + + @add_start_docstrings(SPEECH_ENCODER_DECODER_ENCODE_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=FlaxBaseModelOutput, config_class=_CONFIG_FOR_DOC) + def encode( + self, + inputs: jnp.ndarray, + attention_mask: Optional[jnp.ndarray] = None, + extract_features: Optional[jnp.ndarray] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + output_features: Optional[bool] = None, + return_dict: Optional[bool] = None, + train: bool = False, + freeze_feature_encoder: bool = False, + params: dict = None, + dropout_rng: PRNGKey = None, + ): + r""" + Returns: + + Example: + + ```python + >>> from transformers import FlaxSpeechEncoderDecoderModel + + >>> # initialize a wav2vec2-2-bart from pretrained wav2vec2 and bart models. Note that the cross-attention layers will be randomly initialized + >>> model = FlaxSpeechEncoderDecoderModel.from_encoder_decoder_pretrained( + ... "facebook/wav2vec2-large-lv60", "facebook/bart-large" + ... ) + + >>> inputs = jnp.ones((2, 5000), dtype=jnp.float32) + >>> encoder_outputs = model.encode(inputs) + ```""" + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.return_dict + + if attention_mask is None: + attention_mask = jnp.ones_like(inputs, dtype="i4") + + if extract_features is not None: + extract_features = jnp.array(extract_features, dtype="f4") + + # Handle any PRNG if needed + rngs = {} + if dropout_rng is not None: + rngs["dropout"] = dropout_rng + + def _encoder_forward(module, inputs, attention_mask, **kwargs): + encode_module = module._get_encoder_module() + return encode_module(inputs, attention_mask, **kwargs) + + outputs = self.module.apply( + {"params": params or self.params}, + inputs=jnp.array(inputs, dtype="f4"), + attention_mask=jnp.array(attention_mask, dtype="i4"), + extract_features=extract_features, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + output_features=output_features, + return_dict=return_dict, + deterministic=not train, + freeze_feature_encoder=freeze_feature_encoder, + rngs=rngs, + method=_encoder_forward, + ) + + if return_dict and not output_features: + outputs = FlaxBaseModelOutput( + last_hidden_state=outputs.last_hidden_state, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) + + return outputs + + @add_start_docstrings(SPEECH_ENCODER_DECODER_DECODE_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=FlaxCausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) + def decode( + self, + decoder_input_ids, + encoder_outputs, + encoder_attention_mask: Optional[jnp.ndarray] = None, + decoder_attention_mask: Optional[jnp.ndarray] = None, + decoder_position_ids: Optional[jnp.ndarray] = None, + past_key_values: dict = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + train: bool = False, + params: dict = None, + dropout_rng: PRNGKey = None, + ): + r""" + Returns: + + Example: + + ```python + >>> from transformers import FlaxSpeechEncoderDecoderModel + >>> import jax.numpy as jnp + + >>> # initialize a wav2vec2-2-bart from pretrained wav2vec2 and bart models. Note that the cross-attention layers will be randomly initialized + >>> model = FlaxSpeechEncoderDecoderModel.from_encoder_decoder_pretrained( + ... "facebook/wav2vec2-large-lv60", "facebook/bart-large" + ... ) + + >>> inputs = jnp.ones((2, 5000), dtype=jnp.float32) + >>> encoder_outputs = model.encode(inputs) + + >>> decoder_start_token_id = model.config.decoder.bos_token_id + >>> decoder_input_ids = jnp.ones((inputs.shape[0], 1), dtype="i4") * decoder_start_token_id + + >>> outputs = model.decode(decoder_input_ids, encoder_outputs) + >>> logits = outputs.logits + ```""" + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.return_dict + + encoder_hidden_states = encoder_outputs[0] + if encoder_attention_mask is None: + batch_size, sequence_length = encoder_hidden_states.shape[:2] + encoder_attention_mask = jnp.ones((batch_size, sequence_length)) + + batch_size, sequence_length = decoder_input_ids.shape + if decoder_attention_mask is None: + decoder_attention_mask = jnp.ones((batch_size, sequence_length)) + + if decoder_position_ids is None: + if past_key_values is not None: + raise ValueError("Make sure to provide `decoder_position_ids` when passing `past_key_values`.") + + decoder_position_ids = jnp.broadcast_to( + jnp.arange(sequence_length)[None, :], (batch_size, sequence_length) + ) + + # Handle any PRNG if needed + rngs = {} + if dropout_rng is not None: + rngs["dropout"] = dropout_rng + + params = {"params": params or self.params} + + # if past_key_values are passed then cache is already initialized a private flag init_cache has to be + # passed down to ensure cache is used. It has to be made sure that cache is marked as mutable so that + # it can be changed by FlaxBartAttention module + if past_key_values: + params["cache"] = past_key_values + mutable = ["cache"] + else: + mutable = False + + def _decoder_forward( + module, decoder_input_ids, decoder_attention_mask, decoder_position_ids, encoder_hidden_states, **kwargs + ): + + projection_module = module._get_projection_module() + decoder_module = module._get_decoder_module() + + # optionally project encoder_hidden_states + if projection_module is not None: + encoder_hidden_states = projection_module(encoder_hidden_states) + + return decoder_module( + decoder_input_ids, + decoder_attention_mask, + decoder_position_ids, + encoder_hidden_states, + **kwargs, + ) + + outputs = self.module.apply( + params, + decoder_input_ids=jnp.array(decoder_input_ids, dtype="i4"), + decoder_attention_mask=jnp.array(decoder_attention_mask, dtype="i4"), + decoder_position_ids=jnp.array(decoder_position_ids, dtype="i4"), + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=jnp.array(encoder_attention_mask, dtype="i4"), + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + deterministic=not train, + rngs=rngs, + mutable=mutable, + method=_decoder_forward, + ) + + # add updated cache to model output + if past_key_values is not None and return_dict: + outputs, past = outputs + outputs["past_key_values"] = unfreeze(past["cache"]) + return outputs + elif past_key_values is not None and not return_dict: + outputs, past = outputs + outputs = outputs[:1] + (unfreeze(past["cache"]),) + outputs[1:] + + return outputs + + @add_start_docstrings_to_model_forward(SPEECH_ENCODER_DECODER_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=FlaxSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) + def __call__( + self, + inputs: jnp.ndarray, + attention_mask: Optional[jnp.ndarray] = None, + extract_features: Optional[jnp.ndarray] = None, + decoder_input_ids: Optional[jnp.ndarray] = None, + decoder_attention_mask: Optional[jnp.ndarray] = None, + decoder_position_ids: Optional[jnp.ndarray] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + output_features: Optional[bool] = None, + return_dict: Optional[bool] = None, + train: bool = False, + freeze_feature_encoder: bool = False, + params: dict = None, + dropout_rng: PRNGKey = None, + ): + r""" + Returns: + + Examples: + + ```python + >>> from transformers import FlaxSpeechEncoderDecoderModel, BartTokenizer + + >>> # load a fine-tuned wav2vec2-2-bart model + >>> model = FlaxSpeechEncoderDecoderModel.from_pretrained("patrickvonplaten/wav2vec2-2-bart-large") + >>> # load output tokenizer + >>> tokenizer_output = BartTokenizer.from_pretrained("facebook/bart-large") + + >>> inputs = jnp.ones((2, 5000), dtype=jnp.float32) + + >>> # use bart's special bos, pad and eos tokens + >>> model.config.decoder_start_token_id = model.decoder.config.bos_token_id + >>> model.config.pad_token_id = model.decoder.config.pad_token_id + >>> model.config.eos_token_id = model.decoder.config.eos_token_id + + >>> outputs = model.generate(inputs) + # Assert something? More interesting input? dtype correct? + ``` + """ + + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.return_dict + + # prepare encoder inputs + if attention_mask is None: + attention_mask = jnp.ones_like(inputs, dtype="i4") + + if extract_features is not None: + inputs = None # we can omit passing the inputs to the model to save memory + extract_features = jnp.array(extract_features, dtype="f4") + else: + inputs = jnp.array(inputs, dtype="f4") + + # prepare decoder inputs + if decoder_input_ids is None: + raise ValueError( + "`decoder_input_ids` cannot be `None`. For sequence to sequence training, `decoder_position_ids` must be specified as an input argument." + ) + if decoder_attention_mask is None: + decoder_attention_mask = jnp.ones_like(decoder_input_ids) + if decoder_position_ids is None: + batch_size, sequence_length = decoder_input_ids.shape + decoder_position_ids = jnp.broadcast_to( + jnp.arange(sequence_length)[None, :], (batch_size, sequence_length) + ) + + # Handle any PRNG if needed + rngs = {"dropout": dropout_rng} if dropout_rng is not None else {} + + return self.module.apply( + {"params": params or self.params}, + inputs=inputs, + attention_mask=jnp.array(attention_mask, dtype="i4"), + extract_features=extract_features, + decoder_input_ids=jnp.array(decoder_input_ids, dtype="i4"), + decoder_attention_mask=jnp.array(decoder_attention_mask, dtype="i4"), + decoder_position_ids=jnp.array(decoder_position_ids, dtype="i4"), + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + output_features=output_features, + return_dict=return_dict, + deterministic=not train, + freeze_feature_encoder=freeze_feature_encoder, + rngs=rngs, + ) + + def prepare_inputs_for_generation( + self, + decoder_input_ids, + max_length, + attention_mask: Optional[jnp.DeviceArray] = None, + decoder_attention_mask: Optional[jnp.DeviceArray] = None, + encoder_outputs=None, + **kwargs + ): + # initializing the cache + batch_size, seq_length = decoder_input_ids.shape + + past_key_values = self.init_cache(batch_size, max_length, encoder_outputs) + # Note that usually one would have to put 0's in the attention_mask for x > input.shape[-1] and x < cache_length. + # But since the decoder uses a causal mask, those positions are masked anyways. + # Thus we can create a single static attention_mask here, which is more efficient for compilation + extended_attention_mask = jnp.ones((batch_size, max_length), dtype="i4") + if decoder_attention_mask is not None: + decoder_position_ids = decoder_attention_mask.cumsum(axis=-1) - 1 + extended_attention_mask = lax.dynamic_update_slice(extended_attention_mask, decoder_attention_mask, (0, 0)) + else: + decoder_position_ids = jnp.broadcast_to( + jnp.arange(seq_length, dtype="i4")[None, :], (batch_size, seq_length) + ) + + return { + "past_key_values": past_key_values, + "encoder_outputs": encoder_outputs, + "encoder_attention_mask": attention_mask, + "decoder_attention_mask": extended_attention_mask, + "decoder_position_ids": decoder_position_ids, + } + + def update_inputs_for_generation(self, model_outputs, model_kwargs): + model_kwargs["past_key_values"] = model_outputs.past_key_values + model_kwargs["decoder_position_ids"] = model_kwargs["decoder_position_ids"][:, -1:] + 1 + return model_kwargs + + @classmethod + def from_encoder_decoder_pretrained( + cls, + encoder_pretrained_model_name_or_path: Optional[Union[str, os.PathLike]] = None, + decoder_pretrained_model_name_or_path: Optional[Union[str, os.PathLike]] = None, + *model_args, + **kwargs + ) -> FlaxPreTrainedModel: + r""" + Instantiate an encoder and a decoder from one or two base classes of the library from pretrained model + checkpoints. + + Params: + encoder_pretrained_model_name_or_path (`Union[str, os.PathLike]`, *optional*): + Information necessary to initiate the encoder. Can be either: + + - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co. + Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a + user or organization name, like `dbmdz/bert-base-german-cased`. + - A path to a *directory* containing model weights saved using + [`~FlaxPreTrainedModel.save_pretrained`], e.g., `./my_model_directory/`. + + decoder_pretrained_model_name_or_path (`Union[str, os.PathLike]`, *optional*, defaults to `None`): + Information necessary to initiate the decoder. Can be either: + + - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co. + Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a + user or organization name, like `dbmdz/bert-base-german-cased`. + - A path to a *directory* containing model weights saved using + [`~FlaxPreTrainedModel.save_pretrained`], e.g., `./my_model_directory/`. + + model_args (remaining positional arguments, *optional*): + All remaning positional arguments will be passed to the underlying model's `__init__` method. + + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to update the configuration object (after it being loaded) and initiate the model (e.g., + `output_attentions=True`). + + - To update the encoder configuration, use the prefix *encoder_* for each configuration parameter. + - To update the decoder configuration, use the prefix *decoder_* for each configuration parameter. + - To update the parent model configuration, do not use a prefix for each configuration parameter. + + Behaves differently depending on whether a `config` is provided or automatically loaded. + + Example: + + ```python + >>> from transformers import FlaxSpeechEncoderDecoderModel + + >>> # initialize a wav2vec2-2-bart from pretrained wav2vec2 and bart models. Note that the cross-attention layers will be randomly initialized + >>> model = FlaxSpeechEncoderDecoderModel.from_encoder_decoder_pretrained( + ... "facebook/wav2vec2-large-lv60", "facebook/bart-large" + ... ) + >>> # saving model after fine-tuning + >>> model.save_pretrained("./wav2vec2-2-bart-large") + >>> # load fine-tuned model + >>> model = FlaxSpeechEncoderDecoderModel.from_pretrained("./wav2vec2-2-bart-large") + ```""" + + kwargs_encoder = { + argument[len("encoder_") :]: value for argument, value in kwargs.items() if argument.startswith("encoder_") + } + + kwargs_decoder = { + argument[len("decoder_") :]: value for argument, value in kwargs.items() if argument.startswith("decoder_") + } + + # remove encoder, decoder kwargs from kwargs + for key in kwargs_encoder.keys(): + del kwargs["encoder_" + key] + for key in kwargs_decoder.keys(): + del kwargs["decoder_" + key] + + # Load and initialize the encoder and decoder + # The distinction between encoder and decoder at the model level is made + # by the value of the flag `is_decoder` that we need to set correctly. + encoder = kwargs_encoder.pop("model", None) + if encoder is None: + if encoder_pretrained_model_name_or_path is None: + raise ValueError( + "If `encoder_model` is not defined as an argument, a `encoder_pretrained_model_name_or_path` has " + "to be defined." + ) + + if "config" not in kwargs_encoder: + # TODO: AutoConfig .from_pretrained + encoder_config, kwargs_encoder = Wav2Vec2Config.from_pretrained( + encoder_pretrained_model_name_or_path, **kwargs_encoder, return_unused_kwargs=True + ) + if encoder_config.is_decoder is True or encoder_config.add_cross_attention is True: + logger.info( + f"Initializing {encoder_pretrained_model_name_or_path} as a encoder model " + "from a decoder model. Cross-attention and casual mask are disabled." + ) + encoder_config.is_decoder = False + encoder_config.add_cross_attention = False + + kwargs_encoder["config"] = encoder_config + + # TODO: FlaxAutoModel .from_pretrained + encoder = FlaxWav2Vec2Model.from_pretrained( + encoder_pretrained_model_name_or_path, *model_args, **kwargs_encoder + ) + + decoder = kwargs_decoder.pop("model", None) + if decoder is None: + if decoder_pretrained_model_name_or_path is None: + raise ValueError( + "If `decoder_model` is not defined as an argument, a `decoder_pretrained_model_name_or_path` has " + "to be defined." + ) + + if "config" not in kwargs_decoder: + # TODO: AutoConfig .from_pretrained + decoder_config, kwargs_decoder = BartConfig.from_pretrained( + decoder_pretrained_model_name_or_path, **kwargs_decoder, return_unused_kwargs=True + ) + if decoder_config.is_decoder is False or decoder_config.add_cross_attention is False: + logger.info( + f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. " + f"Cross attention layers are added to {decoder_pretrained_model_name_or_path} " + f"and randomly initialized if {decoder_pretrained_model_name_or_path}'s architecture allows for " + "cross attention layers." + ) + decoder_config.is_decoder = True + decoder_config.add_cross_attention = True + + kwargs_decoder["config"] = decoder_config + + if kwargs_decoder["config"].is_decoder is False or kwargs_decoder["config"].add_cross_attention is False: + logger.warning( + f"Decoder model {decoder_pretrained_model_name_or_path} is not initialized as a decoder. " + f"In order to initialize {decoder_pretrained_model_name_or_path} as a decoder, " + "make sure that the attributes `is_decoder` and `add_cross_attention` of `decoder_config` " + "passed to `.from_encoder_decoder_pretrained(...)` are set to `True` or do not pass a " + "`decoder_config` to `.from_encoder_decoder_pretrained(...)`" + ) + + # TODO: FlaxAutoModelForCausalLM .from_pretrained + decoder = FlaxBartForCausalLM.from_pretrained(decoder_pretrained_model_name_or_path, **kwargs_decoder) + + # instantiate config with corresponding kwargs + dtype = kwargs.pop("dtype", jnp.float32) + config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config, **kwargs) + + # make sure input & output word embeddings are not tied + config.tie_word_embeddings = False + + # init model + model = cls(config, dtype=dtype) + model.params["encoder"] = encoder.params + model.params["decoder"] = decoder.params + + return model + + def _beam_search( + self, + input_ids: None, + max_length: Optional[int] = None, + pad_token_id: Optional[int] = None, + eos_token_id: Optional[int] = None, + length_penalty: Optional[float] = None, + early_stopping: Optional[bool] = None, + logits_processor: Optional[FlaxLogitsProcessorList] = None, + trace: bool = True, + params: Optional[Dict[str, jnp.ndarray]] = None, + model_kwargs: Optional[Dict[str, jnp.ndarray]] = None, + ): + """ + This beam search function is heavily inspired by Flax's official example: + https://github.com/google/flax/blob/master/examples/wmt/train.py#L254 + """ + + def flatten_beam_dim(tensor): + """Flattens the first two dimensions of a non-scalar array.""" + # ignore scalars (e.g. cache index) + if tensor.ndim == 0 or tensor.ndim == 1: + return tensor + elif tensor.ndim == 6: + return tensor.reshape(tensor.shape[:1] + (tensor.shape[1] * tensor.shape[2],) + tensor.shape[3:]) + return tensor.reshape((tensor.shape[0] * tensor.shape[1],) + tensor.shape[2:]) + + def unflatten_beam_dim(tensor, batch_size, num_beams): + """Unflattens the first, flat batch*beam dimension of a non-scalar array.""" + # ignore scalars (e.g. cache index) + if tensor.ndim == 0 or tensor.ndim == 1: + return tensor + if tensor.ndim == 5: + return tensor.reshape(tensor.shape[:1] + (batch_size, num_beams) + tensor.shape[2:]) + return tensor.reshape((batch_size, num_beams) + tensor.shape[1:]) + + def gather_beams(nested, beam_indices, batch_size, new_num_beams): + """ + Gathers the beam slices indexed by beam_indices into new beam array. + """ + batch_indices = jnp.reshape( + jnp.arange(batch_size * new_num_beams) // new_num_beams, (batch_size, new_num_beams) + ) + + def gather_fn(tensor): + # ignore scalars (e.g. cache index) + if tensor.ndim == 0 or tensor.ndim == 1: + return tensor + if tensor.ndim == 6: + return tensor[:, batch_indices, beam_indices] + return tensor[batch_indices, beam_indices] + + return jax.tree_map(gather_fn, nested) + + # init values + max_length = max_length if max_length is not None else self.config.max_length + pad_token_id = pad_token_id if pad_token_id is not None else self.config.pad_token_id + eos_token_id = eos_token_id if eos_token_id is not None else self.config.eos_token_id + length_penalty = length_penalty if length_penalty is not None else self.config.length_penalty + early_stopping = early_stopping if early_stopping is not None else self.config.early_stopping + + batch_size, num_beams, cur_len = input_ids.shape + + eos_token_id = jnp.array(eos_token_id) + pad_token_id = jnp.array(pad_token_id) + cur_len = jnp.array(cur_len) + + # per batch,beam-item holding current token in loop. + sequences = jnp.full((batch_size, num_beams, max_length), pad_token_id, dtype=jnp.int32) + running_sequences = jnp.full((batch_size, num_beams, max_length), pad_token_id, dtype=jnp.int32) + running_sequences = lax.dynamic_update_slice(sequences, input_ids, (0, 0, 0)) + + # per batch,beam-item state bit indicating if sentence has finished. + is_sent_finished = jnp.zeros((batch_size, num_beams), dtype=jnp.bool_) + + # per batch,beam-item score, logprobs + running_scores = jnp.tile(jnp.array([0.0] + [np.array(-1.0e7)] * (num_beams - 1)), [batch_size, 1]) + scores = jnp.ones((batch_size, num_beams)) * np.array(-1.0e7) + + # For Seq2Seq generation, we only need to use the decoder instead of the whole model in generation loop + # and pass it the `encoder_outputs`, which are part of the `model_kwargs`. + model = self.decode if self.config.is_encoder_decoder else self + + # flatten beam dim + if "encoder_outputs" in model_kwargs: + model_kwargs["encoder_outputs"]["last_hidden_state"] = flatten_beam_dim( + model_kwargs["encoder_outputs"]["last_hidden_state"] + ) + if "attention_mask" in model_kwargs: + model_kwargs["attention_mask"] = flatten_beam_dim(model_kwargs["attention_mask"]) + + # initialize model specific kwargs + model_kwargs = self.prepare_inputs_for_generation(flatten_beam_dim(input_ids), max_length, **model_kwargs) + + # initialize state + state = BeamSearchState( + cur_len=cur_len, + running_sequences=running_sequences, + running_scores=running_scores, + sequences=sequences, + scores=scores, + is_sent_finished=is_sent_finished, + model_kwargs=model_kwargs, + ) + + def beam_search_cond_fn(state): + """beam search state termination condition fn.""" + + # 1. is less than max length? + not_max_length_yet = state.cur_len < max_length + + # 2. can the new beams still improve? + best_running_score = state.running_scores[:, -1:] / (max_length**length_penalty) + worst_finished_score = jnp.where( + state.is_sent_finished, jnp.min(state.scores, axis=1, keepdims=True), np.array(-1.0e7) + ) + improvement_still_possible = jnp.all(worst_finished_score < best_running_score) + + # 3. is there still a beam that has not finished? + still_open_beam = ~(jnp.all(state.is_sent_finished) & early_stopping) + + return not_max_length_yet & still_open_beam & improvement_still_possible + + def beam_search_body_fn(state, input_ids_length=1): + """beam search state update fn.""" + # 1. Forward current tokens + # Collect the current position slice along length to feed the fast + # autoregressive decoder model. Flatten the beam dimension into batch + # dimension for feeding into the model. + # unflatten beam dimension + # Unflatten beam dimension in attention cache arrays + input_token = flatten_beam_dim( + lax.dynamic_slice( + state.running_sequences, + (0, 0, state.cur_len - input_ids_length), + (batch_size, num_beams, input_ids_length), + ) + ) + model_outputs = model(input_token, params=params, **state.model_kwargs) + + logits = unflatten_beam_dim(model_outputs.logits[:, -1], batch_size, num_beams) + cache = jax.tree_map( + lambda tensor: unflatten_beam_dim(tensor, batch_size, num_beams), model_outputs.past_key_values + ) + + # adapt logits for FlaxMarianMTModel + logits = self._adapt_logits_for_beam_search(logits) + + # 2. Compute log probs + # get log probabilities from logits, + # process logits with processors (*e.g.* min_length, ...), and + # add new logprobs to existing running logprobs scores. + log_probs = jax.nn.log_softmax(logits) + log_probs = logits_processor( + flatten_beam_dim(running_sequences), flatten_beam_dim(log_probs), state.cur_len + ) + log_probs = unflatten_beam_dim(log_probs, batch_size, num_beams) + log_probs = log_probs + jnp.expand_dims(state.running_scores, axis=2) + vocab_size = log_probs.shape[2] + log_probs = log_probs.reshape((batch_size, num_beams * vocab_size)) + + # 3. Retrieve top-K + # Each item in batch has num_beams * vocab_size candidate sequences. + # For each item, get the top 2*k candidates with the highest log- + # probabilities. We gather the top 2*K beams here so that even if the best + # K sequences reach EOS simultaneously, we have another K sequences + # remaining to continue the live beam search. + # Gather the top 2*K scores from _all_ beams. + # Gather 2*k top beams. + # Recover the beam index by floor division. + # Recover token id by modulo division and expand Id array for broadcasting. + # Update sequences for the 2*K top-k new sequences. + beams_to_keep = 2 * num_beams + topk_log_probs, topk_indices = lax.top_k(log_probs, k=beams_to_keep) + topk_beam_indices = topk_indices // vocab_size + topk_running_sequences = gather_beams( + state.running_sequences, topk_beam_indices, batch_size, beams_to_keep + ) + topk_ids = jnp.expand_dims(topk_indices % vocab_size, axis=2) + topk_sequences = lax.dynamic_update_slice(topk_running_sequences, topk_ids, (0, 0, state.cur_len)) + + # 4. Check which sequences have ended + # Update current sequences: + # Did any of these sequences reach an end marker? + # To prevent these just finished sequences from being added to the current sequences + # set of active beam search sequences, set their log probs to a very large + # negative value. + did_topk_just_finished = topk_sequences[:, :, state.cur_len] == eos_token_id + running_topk_log_probs = topk_log_probs + did_topk_just_finished * np.array(-1.0e7) + # 5. Get running sequences scores for next + # Determine the top k beam indices (from top 2*k beams) from log probs + # and gather top k beams (from top 2*k beams). + next_topk_indices = jnp.flip(lax.top_k(running_topk_log_probs, k=num_beams)[1], axis=1) + next_running_sequences, next_running_scores = gather_beams( + [topk_sequences, running_topk_log_probs], next_topk_indices, batch_size, num_beams + ) + + # 6. Process topk logits + # Further process log probs: + # - add length penalty + # - make sure no scores can be added anymore if beam is full + # - make sure still running sequences cannot be chosen as finalized beam + topk_log_probs = topk_log_probs / (state.cur_len**length_penalty) + beams_in_batch_are_full = ( + jnp.broadcast_to(state.is_sent_finished.all(axis=-1, keepdims=True), did_topk_just_finished.shape) + & early_stopping + ) + add_penalty = ~did_topk_just_finished | beams_in_batch_are_full + topk_log_probs += add_penalty * np.array(-1.0e7) + + # 7. Get scores, sequences, is sentence finished for next. + # Combine sequences, scores, and flags along the beam dimension and compare + # new finished sequence scores to existing finished scores and select the + # best from the new set of beams + merged_sequences = jnp.concatenate([state.sequences, topk_sequences], axis=1) + merged_scores = jnp.concatenate([state.scores, topk_log_probs], axis=1) + merged_is_sent_finished = jnp.concatenate([state.is_sent_finished, did_topk_just_finished], axis=1) + topk_merged_indices = jnp.flip(lax.top_k(merged_scores, k=num_beams)[1], axis=1) + next_sequences, next_scores, next_is_sent_finished = gather_beams( + [merged_sequences, merged_scores, merged_is_sent_finished], topk_merged_indices, batch_size, num_beams + ) + + # 8. Update model kwargs. + # Determine the top k beam indices from the original set of all beams. + # With these, gather the top k beam-associated caches. + next_running_indices = gather_beams(topk_beam_indices, next_topk_indices, batch_size, num_beams) + next_cache = gather_beams(cache, next_running_indices, batch_size, num_beams) + model_outputs["past_key_values"] = jax.tree_map(lambda x: flatten_beam_dim(x), next_cache) + next_model_kwargs = self.update_inputs_for_generation(model_outputs, state.model_kwargs) + + return BeamSearchState( + cur_len=state.cur_len + 1, + running_scores=next_running_scores, + running_sequences=next_running_sequences, + scores=next_scores, + sequences=next_sequences, + is_sent_finished=next_is_sent_finished, + model_kwargs=next_model_kwargs, + ) + + # The very first prompt often has sequence length > 1, so run outside of `lax.while_loop` to comply with TPU + if input_ids.shape[-1] > 1: + state = partial(beam_search_body_fn, input_ids_length=input_ids.shape[-1])(state) + + if not trace: + state = self._run_loop_in_debug(beam_search_cond_fn, beam_search_body_fn, state) + else: + state = lax.while_loop(beam_search_cond_fn, beam_search_body_fn, state) + + # Account for the edge-case where there are no finished sequences for a + # particular batch item. If so, return running sequences for that batch item. + none_finished = jnp.any(state.is_sent_finished, axis=1) + sequences = jnp.where(none_finished[:, None, None], state.sequences, state.running_sequences) + scores = jnp.where(none_finished[:, None], state.scores, state.running_scores) + + # return all beams for each batch and the best score + sequences = sequences[:, :] + scores = scores[:, -1] + + return FlaxBeamSearchOutput(sequences=sequences, scores=scores) diff --git a/models/modeling_flax_wav2vec2.py b/models/modeling_flax_wav2vec2.py new file mode 100644 index 0000000000000000000000000000000000000000..8bf1a50af1dc5dce32577b8f8d61806afafde117 --- /dev/null +++ b/models/modeling_flax_wav2vec2.py @@ -0,0 +1,975 @@ +# coding=utf-8 +# Copyright 2021 The Fairseq Authors and the HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Flax Wav2Vec2 model.""" + +from functools import partial +from typing import Optional, Tuple, Union + +import flax +import flax.linen as nn +import jax +import jax.numpy as jnp +from flax.core.frozen_dict import FrozenDict +from flax.linen import partitioning as nn_partitioning +from flax.linen.attention import dot_product_attention_weights +from jax import lax + +from transformers.modeling_flax_outputs import FlaxBaseModelOutput, FlaxCausalLMOutput +from transformers.modeling_flax_utils import ACT2FN, FlaxPreTrainedModel +from transformers.utils import ModelOutput + +from models import Wav2Vec2Config + +scan_with_axes = nn_partitioning.scan_with_axes +remat = nn_partitioning.remat + + +@flax.struct.dataclass +class FlaxWav2Vec2BaseModelOutput(ModelOutput): + """ + Output type of [`FlaxWav2Vec2BaseModelOutput`], with potential hidden states and attentions. + + Args: + last_hidden_state (`jnp.ndarray` of shape `(batch_size, sequence_length, hidden_size)`): + Sequence of hidden-states at the output of the last layer of the model. + extract_features (`jnp.ndarray` of shape `(batch_size, sequence_length, last_conv_dim)`): + Sequence of extracted feature vectors of the last convolutional layer of the model with `last_conv_dim` + being the dimension of the last convolutional layer. + hidden_states (`tuple(jnp.ndarray)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): + Tuple of `jnp.ndarray` (one for the output of the embeddings + one for the output of each layer) of shape + `(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (`tuple(jnp.ndarray)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): + Tuple of `jnp.ndarray` (one for each layer) of shape `(batch_size, num_heads, sequence_length, + sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + """ + + last_hidden_state: jnp.ndarray = None + extract_features: jnp.ndarray = None + hidden_states: Optional[Tuple[jnp.ndarray]] = None + attentions: Optional[Tuple[jnp.ndarray]] = None + + +WAV_2_VEC_2_START_DOCSTRING = r""" + Wav2Vec2 was proposed in [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech + Representations](https://arxiv.org/abs/2006.11477) by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael + Auli. + + This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the + library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads + etc.) + + This model is also a Flax Linen + [flax.nn.Module](https://flax.readthedocs.io/en/latest/_autosummary/flax.nn.module.html) subclass. Use it as a + regular Flax Module and refer to the Flax documentation for all matter related to general usage and behavior. + + Finally, this model supports inherent JAX features such as: + + - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit) + - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation) + - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap) + - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap) + + Parameters: + config ([`Wav2Vec2Config`]): Model configuration class with all the parameters of the model. + Initializing with a config file does not load the weights associated with the model, only the + configuration. Check out the [`~FlaxPreTrainedModel.from_pretrained`] method to load the model weights. + dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`): + The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and + `jax.numpy.bfloat16` (on TPUs). + + This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If + specified all the computation will be performed with the given `dtype`. + + **Note that this only specifies the dtype of the computation and does not influence the dtype of model + parameters.** + + If you wish to change the dtype of the model parameters, see [`~FlaxPreTrainedModel.to_fp16`] and + [`~FlaxPreTrainedModel.to_bf16`]. +""" + + +WAV_2_VEC_2_INPUTS_DOCSTRING = r""" + Args: + input_values (`jnp.ndarray` of shape `(batch_size, sequence_length)`): + Float values of input raw speech waveform. Values can be obtained by loading a *.flac* or *.wav* audio file + into an array of type *List[float]* or a *numpy.ndarray*, *e.g.* via the soundfile library (*pip install + soundfile*). To prepare the array into *input_values*, the [`Wav2Vec2Processor`] should be used for padding + and conversion into a tensor of type *jnp.ndarray*. See [`Wav2Vec2Processor.__call__`] for details. + attention_mask (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing convolution and attention on padding token indices. Mask values selected in `[0, + 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) .. warning:: `attention_mask` should only be passed + if the corresponding processor has `config.return_attention_mask == True`. For all models whose processor + has `config.return_attention_mask == False`, such as + [wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base-960h), `attention_mask` should **not** be + passed to avoid degraded performance when doing batched inference. For such models `input_values` should + simply be padded with 0 and passed without `attention_mask`. Be aware that these models also yield slightly + different results depending on whether `input_values` is padded or not. + mask_time_indices (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*): + Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict + masked extracted features in *config.proj_codevector_dim* space. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. +""" + + +class FlaxWav2Vec2LayerNormConvLayer(nn.Module): + config: Wav2Vec2Config + layer_id: int = 0 + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.in_conv_dim = self.config.conv_dim[self.layer_id] if self.layer_id > 0 else 1 + self.out_conv_dim = self.config.conv_dim[self.layer_id] + + self.conv = nn.Conv( + features=self.config.conv_dim[self.layer_id], + kernel_size=(self.config.conv_kernel[self.layer_id],), + strides=(self.config.conv_stride[self.layer_id],), + use_bias=self.config.conv_bias, + kernel_init=jax.nn.initializers.he_normal(), + padding="VALID", + dtype=self.dtype, + ) + self.layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype) + self.activation = ACT2FN[self.config.feat_extract_activation] + + def __call__(self, hidden_states): + hidden_states = self.conv(hidden_states) + hidden_states = self.layer_norm(hidden_states) + hidden_states = self.activation(hidden_states) + return hidden_states + + +class FlaxConvWithWeightNorm(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.conv = nn.Conv( + features=self.config.hidden_size, + kernel_size=(self.config.num_conv_pos_embeddings,), + kernel_init=jax.nn.initializers.he_normal(), + padding="VALID", + feature_group_count=self.config.num_conv_pos_embedding_groups, + dtype=self.dtype, + ) + weight_shape = ( + self.conv.features, + self.conv.features // self.conv.feature_group_count, + self.conv.kernel_size[0], + ) + self.weight_v = self.param("weight_v", jax.nn.initializers.he_normal(), weight_shape) + self.weight_g = self.param("weight_g", lambda _: jnp.linalg.norm(self.weight_v, axis=(0, 1))[None, None, :]) + self.bias = self.param("bias", jax.nn.initializers.zeros, (self.conv.features,)) + self.prev_padding = self.conv.kernel_size[0] // 2 + + def _get_normed_weights(self): + weight_v_norm = jnp.linalg.norm(self.weight_v, axis=(0, 1))[None, None, :] + normed_weight_v = jnp.divide(self.weight_v, weight_v_norm) + normed_kernel = jnp.multiply(normed_weight_v, self.weight_g) + return normed_kernel + + def __call__(self, hidden_states): + kernel = self._get_normed_weights() + hidden_states = jnp.pad(hidden_states, ((0, 0), (self.prev_padding, self.prev_padding), (0, 0))) + hidden_states = self.conv.apply({"params": {"kernel": kernel.T, "bias": self.bias}}, hidden_states) + return hidden_states + + +class FlaxWav2Vec2PositionalConvEmbedding(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.conv = FlaxConvWithWeightNorm(self.config, dtype=self.dtype) + self.activation = ACT2FN[self.config.feat_extract_activation] + self.num_pad_remove = 1 if self.config.num_conv_pos_embeddings % 2 == 0 else 0 + + def __call__(self, hidden_states): + hidden_states = hidden_states.transpose((0, 1, 2)) + + hidden_states = self.conv(hidden_states) + + if self.num_pad_remove > 0: + hidden_states = hidden_states[:, : -self.num_pad_remove, :] + hidden_states = self.activation(hidden_states) + + hidden_states = hidden_states.transpose((0, 1, 2)) + return hidden_states + + +class FlaxConvLayersCollection(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + if self.config.feat_extract_norm == "layer": + # note that we can't use scan on the conv layers as they differ on a layer-by-layer basis + BlockLayer = remat(FlaxWav2Vec2LayerNormConvLayer) if self.config.gradient_checkpointing else FlaxWav2Vec2LayerNormConvLayer + self.layers = [ + BlockLayer(self.config, layer_id=i, name=str(i), dtype=self.dtype) + for i in range(self.config.num_feat_extract_layers) + ] + elif self.config.feat_extract_norm == "group": + raise NotImplementedError("At the moment only ``config.feat_extact_norm == 'layer'`` is supported") + else: + raise ValueError( + f"`config.feat_extract_norm` is {self.config.feat_extract_norm}, but has to be one of ['group', 'layer']" + ) + + def __call__(self, hidden_states): + for i, conv_layer in enumerate(self.layers): + hidden_states = conv_layer(hidden_states) + return hidden_states + + +class FlaxWav2Vec2FeatureEncoder(nn.Module): + """Construct the features from raw audio waveform""" + + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.conv_layers = FlaxConvLayersCollection(self.config, dtype=self.dtype) + + def __call__(self, input_values, freeze_feature_encoder=False): + hidden_states = input_values[:, :, None] + hidden_states = self.conv_layers(hidden_states) + if freeze_feature_encoder: + hidden_states = jax.lax.stop_gradient(hidden_states) + return hidden_states + + +class FlaxWav2Vec2FeatureProjection(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype) + self.projection = nn.Dense( + self.config.hidden_size, + kernel_init=jax.nn.initializers.normal(self.config.initializer_range), + dtype=self.dtype, + ) + self.dropout = nn.Dropout(rate=self.config.feat_proj_dropout) + + def __call__(self, hidden_states, deterministic=True): + norm_hidden_states = self.layer_norm(hidden_states) + hidden_states = self.projection(norm_hidden_states) + hidden_states = self.dropout(hidden_states, deterministic=deterministic) + return hidden_states, norm_hidden_states + + +class FlaxWav2Vec2Attention(nn.Module): + config: Wav2Vec2Config + embed_dim: int + num_heads: int + dropout: float = 0.0 + bias: bool = True + dtype: jnp.dtype = jnp.float32 # the dtype of the computation + + def setup(self) -> None: + self.head_dim = self.embed_dim // self.num_heads + if self.head_dim * self.num_heads != self.embed_dim: + raise ValueError( + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})." + ) + + dense = partial( + nn.Dense, + self.embed_dim, + use_bias=self.bias, + dtype=self.dtype, + kernel_init=jax.nn.initializers.normal(self.config.initializer_range), + ) + + self.q_proj, self.k_proj, self.v_proj = dense(), dense(), dense() + + self.fused_proj = nn.Dense( + self.embed_dim * 3, + use_bias=self.bias, + dtype=self.dtype, + kernel_init=jax.nn.initializers.normal(self.config.initializer_range), + ) + + self.out_proj = dense() + + self.dropout_layer = nn.Dropout(rate=self.dropout) + + def _split_heads(self, hidden_states): + return hidden_states.reshape(hidden_states.shape[:2] + (self.num_heads, self.head_dim)) + + def _merge_heads(self, hidden_states): + return hidden_states.reshape(hidden_states.shape[:2] + (self.embed_dim,)) + + def __call__( + self, + hidden_states: jnp.ndarray, + key_value_states: Optional[jnp.ndarray] = None, + attention_mask: Optional[jnp.ndarray] = None, + deterministic: bool = True, + ) -> Tuple[jnp.ndarray]: + """Input shape: Batch x Time x Channel""" + + if self.config.fuse_matmuls: + attention_states = self.fused_proj(hidden_states) + query_states, key_states, value_states = jnp.split(attention_states, 3, axis=-1) + + else: + # get query proj + query_states = self.q_proj(hidden_states) + + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + query_states = self._split_heads(query_states) + key_states = self._split_heads(key_states) + value_states = self._split_heads(value_states) + + if attention_mask is not None: + attention_mask = jnp.expand_dims(attention_mask, axis=(-3, -2)) + + # Convert the boolean attention mask to an attention bias. + if attention_mask is not None: + # attention mask in the form of attention bias + attention_bias = lax.select( + attention_mask > 0, + jnp.full(attention_mask.shape, 0.0).astype(self.dtype), + jnp.full(attention_mask.shape, float("-inf")).astype(self.dtype), + ) + else: + attention_bias = None + + dropout_rng = None + if not deterministic and self.dropout > 0.0: + dropout_rng = self.make_rng("dropout") + + attn_weights = dot_product_attention_weights( + query_states, + key_states, + bias=attention_bias, + dropout_rng=dropout_rng, + dropout_rate=self.dropout, + broadcast_dropout=True, + deterministic=deterministic, + dtype=self.dtype, + precision=None, + ) + + attn_output = jnp.einsum("...hqk,...khd->...qhd", attn_weights, value_states) + attn_output = self._merge_heads(attn_output) + attn_output = self.out_proj(attn_output) + + return attn_output, attn_weights + + +class FlaxWav2Vec2FeedForward(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.intermediate_dropout = nn.Dropout(rate=self.config.activation_dropout) + + self.intermediate_dense = nn.Dense( + self.config.intermediate_size, + kernel_init=jax.nn.initializers.normal(self.config.initializer_range), + dtype=self.dtype, + ) + if isinstance(self.config.hidden_act, str): + self.intermediate_act_fn = ACT2FN[self.config.hidden_act] + else: + self.intermediate_act_fn = self.config.hidden_act + + self.output_dense = nn.Dense( + self.config.hidden_size, + kernel_init=jax.nn.initializers.normal(self.config.initializer_range), + dtype=self.dtype, + ) + self.output_dropout = nn.Dropout(rate=self.config.hidden_dropout) + + def __call__(self, hidden_states, deterministic=True): + hidden_states = self.intermediate_dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + hidden_states = self.intermediate_dropout(hidden_states, deterministic=deterministic) + + hidden_states = self.output_dense(hidden_states) + hidden_states = self.output_dropout(hidden_states, deterministic=deterministic) + return hidden_states + + +class FlaxWav2Vec2EncoderLayerStableLayerNorm(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.attention = FlaxWav2Vec2Attention( + config=self.config, + embed_dim=self.config.hidden_size, + num_heads=self.config.num_attention_heads, + dropout=self.config.attention_dropout, + dtype=self.dtype, + ) + self.dropout = nn.Dropout(rate=self.config.hidden_dropout) + self.layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype) + self.feed_forward = FlaxWav2Vec2FeedForward(self.config, dtype=self.dtype) + self.final_layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype) + + def __call__(self, hidden_states, attention_mask=None, deterministic=True, output_attentions=False): + if self.config.use_scan: + hidden_states = hidden_states[0] + attn_residual = hidden_states + hidden_states = self.layer_norm(hidden_states) + hidden_states, attn_weights = self.attention( + hidden_states, attention_mask=attention_mask, deterministic=deterministic + ) + hidden_states = self.dropout(hidden_states, deterministic=deterministic) + hidden_states = attn_residual + hidden_states + hidden_states = hidden_states + self.feed_forward( + self.final_layer_norm(hidden_states), deterministic=deterministic + ) + + outputs = (hidden_states,) + + if output_attentions: + outputs += (attn_weights,) + + if self.config.use_scan: + outputs = (outputs, None) + + return outputs + + +class FlaxWav2Vec2EncoderLayerStableLayerNormCollection(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + @nn.compact + def __call__( + self, + hidden_states, + attention_mask=None, + deterministic: bool = True, + output_attentions: bool = False, + output_hidden_states: bool = False, + return_dict: bool = True, + ): + all_attentions = () if output_attentions else None + all_hidden_states = () if output_hidden_states else None + + num_layers = self.config.num_hidden_layers + BlockEncoderLayer = ( + remat( + FlaxWav2Vec2EncoderLayerStableLayerNorm, + static_argnums=(2, 3), + prevent_cse=not self.config.use_scan, + ) + if self.config.gradient_checkpointing + else FlaxWav2Vec2EncoderLayerStableLayerNorm + ) + + if self.config.use_scan: + # since all decoder layers are the same, we use nn.scan directly + assert not output_attentions, "cannot use `scan` with `output_attentions` set to `True`" + assert not output_hidden_states, "cannot use `scan` with `output_hidden_states` set to `True`" + hidden_states = (hidden_states,) + + hidden_states, _ = scan_with_axes( + BlockEncoderLayer, + variable_axes={"params": 0, "cache": 0}, + split_rngs={"params": True, "dropout": True}, + in_axes=(nn.broadcast, nn.broadcast, nn.broadcast), + length=num_layers, + )(self.config, dtype=self.dtype, name="FlaxWav2Vec2EncoderLayers",)( + hidden_states, attention_mask, deterministic, output_attentions + ) + hidden_states = hidden_states[0] + + else: + for layer in range(num_layers): + if output_hidden_states: + all_hidden_states += (hidden_states,) + + layer_outputs = BlockEncoderLayer( + self.config, + dtype=self.dtype, + name=str(layer), + )(hidden_states, attention_mask, deterministic, output_attentions) + + hidden_states = layer_outputs[0] + + if output_attentions: + all_attentions += (layer_outputs[1],) + + if output_hidden_states: + all_hidden_states += (hidden_states,) + + outputs = (hidden_states, all_hidden_states, all_attentions) + + if not return_dict: + return tuple(v for v in outputs if v is not None) + + return FlaxBaseModelOutput( + last_hidden_state=hidden_states, hidden_states=all_hidden_states, attentions=all_attentions + ) + + +class FlaxWav2Vec2StableLayerNormEncoder(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.pos_conv_embed = FlaxWav2Vec2PositionalConvEmbedding(self.config, dtype=self.dtype) + self.layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype) + self.dropout = nn.Dropout(rate=self.config.hidden_dropout) + self.layers = FlaxWav2Vec2EncoderLayerStableLayerNormCollection(self.config, dtype=self.dtype) + + def __call__( + self, + hidden_states, + attention_mask=None, + deterministic=True, + output_attentions=False, + output_hidden_states=False, + return_dict=True, + ): + + if attention_mask is not None: + # make sure padded tokens are not attended to + hidden_states = jnp.where( + jnp.broadcast_to(attention_mask[:, :, None], hidden_states.shape), hidden_states, 0 + ) + + position_embeddings = self.pos_conv_embed(hidden_states) + + hidden_states = hidden_states + position_embeddings + hidden_states = self.dropout(hidden_states, deterministic=deterministic) + + outputs = self.layers( + hidden_states, + attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + last_hidden_state = self.layer_norm(outputs[0]) + + # update the last element in `hidden_states` after applying `layernorm` above + hidden_states = None + if output_hidden_states: + hidden_states = outputs[1] + hidden_states = hidden_states[:-1] + (last_hidden_state,) + + if not return_dict: + outputs = (last_hidden_state, hidden_states) + (outputs[2:] if output_hidden_states else outputs[1:]) + return tuple(v for v in outputs if v is not None) + + return FlaxBaseModelOutput( + last_hidden_state=last_hidden_state, hidden_states=hidden_states, attentions=outputs.attentions + ) + + +class FlaxWav2Vec2Adapter(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + # hidden_states require down-projection if feature dims don't match + if self.config.output_hidden_size != self.config.hidden_size: + self.proj = nn.Dense( + self.config.output_hidden_size, + kernel_init=jax.nn.initializers.normal(self.config.initializer_range), + dtype=self.dtype, + ) + self.proj_layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype) + else: + self.proj = self.proj_layer_norm = None + + self.layers = FlaxWav2Vec2AdapterLayersCollection(self.config, dtype=self.dtype) + + def __call__(self, hidden_states, deterministic=True): + # down-project hidden_states if required + if self.proj is not None and self.proj_layer_norm is not None: + hidden_states = self.proj(hidden_states) + hidden_states = self.proj_layer_norm(hidden_states) + + hidden_states = self.layers(hidden_states) + + return hidden_states + + +class FlaxWav2Vec2AdapterLayer(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.conv = nn.Conv( + features=2 * self.config.output_hidden_size, + kernel_size=(self.config.adapter_kernel_size,), + strides=(self.config.adapter_stride,), + padding=((1, 1),), + kernel_init=jax.nn.initializers.normal(self.config.initializer_range), + dtype=self.dtype, + ) + + def __call__(self, hidden_states): + hidden_states = self.conv(hidden_states) + hidden_states = nn.glu(hidden_states, axis=2) + + return hidden_states + + +class FlaxWav2Vec2AdapterLayersCollection(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + BlockAdapterLayer = remat(FlaxWav2Vec2AdapterLayer) if self.config.gradient_checkpointing else FlaxWav2Vec2AdapterLayer + self.layers = [ + BlockAdapterLayer(self.config, name=str(i), dtype=self.dtype) + for i in range(self.config.num_adapter_layers) + ] + + def __call__(self, hidden_states): + for conv_layer in self.layers: + hidden_states = conv_layer(hidden_states) + + return hidden_states + + +class FlaxWav2Vec2PreTrainedModel(FlaxPreTrainedModel): + """ + An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained + models. + """ + + config_class = Wav2Vec2Config + base_model_prefix: str = "wav2vec2" + main_input_name = "input_values" + module_class: nn.Module = None + + def __init__( + self, + config: Wav2Vec2Config, + input_shape: Tuple = (1, 1024), + seed: int = 0, + dtype: jnp.dtype = jnp.float32, + _do_init: bool = True, + **kwargs, + ): + module = self.module_class(config=config, dtype=dtype, **kwargs) + super().__init__(config, module, input_shape=input_shape, seed=seed, dtype=dtype, _do_init=_do_init) + + def init_weights(self, rng: jax.random.PRNGKey, input_shape: Tuple) -> FrozenDict: + # init input tensors + input_values = jnp.zeros(input_shape, dtype="i4") + attention_mask = jnp.ones_like(input_values) + params_rng, dropout_rng = jax.random.split(rng, 2) + rngs = {"params": params_rng, "dropout": dropout_rng} + + return self.module.init(rngs, input_values, attention_mask, return_dict=False)["params"] + + def __call__( + self, + input_values, + attention_mask=None, + mask_time_indices=None, + extract_features=None, + params: dict = None, + dropout_rng: jax.random.PRNGKey = None, + train: bool = False, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + output_features: Optional[bool] = None, + freeze_feature_encoder: bool = False, + return_dict: Optional[bool] = None, + ): + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.return_dict + + if attention_mask is None: + batch_size, sequence_length = input_values.shape + attention_mask = jnp.ones((batch_size, sequence_length)) + + if extract_features is not None: + extract_features = jnp.array(extract_features, dtype="f4") + + # Handle any PRNG if needed + rngs = {} + if dropout_rng is not None: + rngs["dropout"] = dropout_rng + + inputs = {"params": params or self.params} + + return self.module.apply( + inputs, + jnp.array(input_values, dtype="f4"), + jnp.array(attention_mask, dtype="i4"), + mask_time_indices, + extract_features, + not train, + output_attentions, + output_hidden_states, + output_features, + freeze_feature_encoder, + return_dict, + rngs=rngs, + ) + + def _get_feat_extract_output_lengths( + self, input_lengths: Union[jnp.ndarray, int], add_adapter: Optional[bool] = None + ): + return self.module._get_feat_extract_output_lengths(input_lengths, add_adapter=add_adapter) + + def _get_feature_vector_attention_mask( + self, feature_vector_length: int, attention_mask: jnp.ndarray, add_adapter=None + ): + return self.module._get_feature_vector_attention_mask(feature_vector_length, attention_mask, add_adapter=add_adapter) + + +class FlaxWav2Vec2Module(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.feature_extractor = FlaxWav2Vec2FeatureEncoder(self.config, dtype=self.dtype) + self.feature_projection = FlaxWav2Vec2FeatureProjection(self.config, dtype=self.dtype) + self.masked_spec_embed = self.param( + "masked_spec_embed", jax.nn.initializers.uniform(), (self.config.hidden_size,) + ) + + if self.config.do_stable_layer_norm: + self.encoder = FlaxWav2Vec2StableLayerNormEncoder(self.config, dtype=self.dtype) + else: + raise NotImplementedError("``config.do_stable_layer_norm is False`` is currently not supported.") + + self.adapter = FlaxWav2Vec2Adapter(self.config, dtype=self.dtype) if self.config.add_adapter else None + + def __call__( + self, + input_values, + attention_mask=None, + mask_time_indices=None, + extract_features=None, + deterministic=True, + output_attentions=None, + output_hidden_states=None, + output_features=False, + freeze_feature_encoder=False, + return_dict=None, + ): + + # forward pass through the feature extractor if features not specified + if extract_features is None: + extract_features = self.feature_extractor(input_values, freeze_feature_encoder=freeze_feature_encoder) + + if output_features: + return extract_features + + # make sure that no loss is computed on padded inputs + if attention_mask is not None: + # compute reduced attention_mask corresponding to feature vectors + attention_mask = self._get_feature_vector_attention_mask( + extract_features.shape[1], attention_mask, add_adapter=False + ) + + hidden_states, extract_features = self.feature_projection(extract_features, deterministic=deterministic) + if mask_time_indices is not None: # apply SpecAugment along time axis with given indices + hidden_states = jnp.where( + jnp.broadcast_to(mask_time_indices[:, :, None], hidden_states.shape), + jnp.broadcast_to(self.masked_spec_embed[None, None, :], hidden_states.shape), + hidden_states, + ) + + encoder_outputs = self.encoder( + hidden_states, + attention_mask=attention_mask, + deterministic=deterministic, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + hidden_states = encoder_outputs[0] + + if self.adapter is not None: + hidden_states = self.adapter(hidden_states) + + if not return_dict: + return (hidden_states, extract_features) + encoder_outputs[1:] + + return FlaxWav2Vec2BaseModelOutput( + last_hidden_state=hidden_states, + extract_features=extract_features, + hidden_states=encoder_outputs.hidden_states, + attentions=encoder_outputs.attentions, + ) + + def _get_feat_extract_output_lengths( + self, input_lengths: Union[jnp.ndarray, int], add_adapter: Optional[bool] = None + ): + """ + Computes the output length of the convolutional layers + """ + + add_adapter = self.config.add_adapter if add_adapter is None else add_adapter + + def _conv_out_length(input_length, kernel_size, stride): + # 1D convolutional layer output length formula taken + # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html + return (input_length - kernel_size) // stride + 1 + + for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride): + input_lengths = _conv_out_length(input_lengths, kernel_size, stride) + + if add_adapter: + for _ in range(self.config.num_adapter_layers): + input_lengths = _conv_out_length(input_lengths, 1, self.config.adapter_stride) + + return input_lengths + + def _get_feature_vector_attention_mask( + self, feature_vector_length: int, attention_mask: jnp.ndarray, add_adapter=None + ): + # Effectively attention_mask.sum(-1), but not inplace to be able to run + # on inference mode. + non_padded_lengths = attention_mask.cumsum(axis=-1)[:, -1] + + output_lengths = self._get_feat_extract_output_lengths(non_padded_lengths, add_adapter=add_adapter) + + batch_size = attention_mask.shape[0] + + attention_mask = jnp.zeros((batch_size, feature_vector_length), dtype=attention_mask.dtype) + # these two operations makes sure that all values + # before the output lengths indices are attended to + attention_mask = attention_mask.at[jnp.arange(attention_mask.shape[0]), output_lengths - 1].set(1) + attention_mask = jnp.flip(jnp.flip(attention_mask, -1).cumsum(-1), -1).astype("bool") + return attention_mask + + +class FlaxWav2Vec2Model(FlaxWav2Vec2PreTrainedModel): + module_class = FlaxWav2Vec2Module + + +class FlaxWav2Vec2ForCTCModule(nn.Module): + config: Wav2Vec2Config + dtype: jnp.dtype = jnp.float32 + + def setup(self): + self.wav2vec2 = FlaxWav2Vec2Module(self.config, dtype=self.dtype) + self.dropout = nn.Dropout(rate=self.config.final_dropout) + self.lm_head = nn.Dense( + self.config.vocab_size, + kernel_init=jax.nn.initializers.normal(self.config.initializer_range), + dtype=self.dtype, + ) + + def __call__( + self, + input_values, + attention_mask=None, + mask_time_indices=None, + extract_features=None, + deterministic=True, + output_attentions=None, + output_hidden_states=None, + output_features=False, + freeze_feature_encoder=False, + return_dict=None, + ): + outputs = self.wav2vec2( + input_values, + attention_mask=attention_mask, + mask_time_indices=mask_time_indices, + deterministic=deterministic, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + freeze_feature_encoder=freeze_feature_encoder, + return_dict=return_dict, + ) + + hidden_states = outputs[0] + hidden_states = self.dropout(hidden_states, deterministic=deterministic) + + logits = self.lm_head(hidden_states) + + if not return_dict: + return (logits,) + outputs[2:] + + return FlaxCausalLMOutput(logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions) + + def _get_feat_extract_output_lengths( + self, input_lengths: Union[jnp.ndarray, int], add_adapter: Optional[bool] = None + ): + """ + Computes the output length of the convolutional layers + """ + + add_adapter = self.config.add_adapter if add_adapter is None else add_adapter + + def _conv_out_length(input_length, kernel_size, stride): + # 1D convolutional layer output length formula taken + # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html + return (input_length - kernel_size) // stride + 1 + + for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride): + input_lengths = _conv_out_length(input_lengths, kernel_size, stride) + + if add_adapter: + for _ in range(self.config.num_adapter_layers): + input_lengths = _conv_out_length(input_lengths, 1, self.config.adapter_stride) + + return input_lengths + + def _get_feature_vector_attention_mask( + self, feature_vector_length: int, attention_mask: jnp.ndarray, add_adapter=None + ): + # Effectively attention_mask.sum(-1), but not inplace to be able to run + # on inference mode. + non_padded_lengths = attention_mask.cumsum(axis=-1)[:, -1] + + output_lengths = self._get_feat_extract_output_lengths(non_padded_lengths, add_adapter=add_adapter) + + batch_size = attention_mask.shape[0] + + attention_mask = jnp.zeros((batch_size, feature_vector_length), dtype=attention_mask.dtype) + # these two operations makes sure that all values + # before the output lengths indices are attended to + attention_mask = attention_mask.at[jnp.arange(attention_mask.shape[0]), output_lengths - 1].set(1) + attention_mask = jnp.flip(jnp.flip(attention_mask, -1).cumsum(-1), -1).astype("bool") + return attention_mask + + +class FlaxWav2Vec2ForCTC(FlaxWav2Vec2PreTrainedModel): + module_class = FlaxWav2Vec2ForCTCModule diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run.sh b/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..a4daeba0379dc91cc89224bd2b962066af12bdd3 --- /dev/null +++ b/run.sh @@ -0,0 +1,48 @@ +WANDB_ENTITY=NbAiLab WANDB_PROJECT=wav2vec2 python run_flax_speech_recognition_ctc.py \ + --model_name_or_path="facebook/wav2vec2-xls-r-1b" \ + --hub_model_id="NbAiLab/wav2vec2-1b-npsc-nst" \ + --tokenizer_name="./" \ + --output_dir="./" \ + --overwrite_output_dir \ + --num_train_epochs="40" \ + --per_device_train_batch_size="12" \ + --per_device_eval_batch_size="12" \ + --gradient_accumulation_steps="1" \ + --precision="full_mixed" \ + --matmul_precision="bfloat16" \ + --learning_rate="1e-4" \ + --warmup_steps="4000" \ + --length_column_name="input_length" \ + --evaluation_strategy="steps" \ + --text_column_name="text" \ + --save_steps="1000" \ + --eval_steps="1000" \ + --logging_steps="100" \ + --layerdrop="0.041" \ + --attention_dropout="0.094" \ + --activation_dropout="0.055" \ + --hidden_dropout="0.047" \ + --save_total_limit="5" \ + --freeze_feature_encoder \ + --feat_proj_dropout="0.04" \ + --mask_time_prob="0.082" \ + --mask_time_length="10" \ + --mask_feature_prob="0.25" \ + --mask_feature_length="64" \ + --gradient_checkpointing \ + --min_duration_in_seconds="0.5" \ + --max_duration_in_seconds="20.0" \ + --use_auth_token \ + --seed="42" \ + --group_by_length \ + --do_train --do_eval \ + --push_to_hub \ + --preprocessing_num_workers="32" \ + --ctc_zero_infinity \ + --do_lower_case \ + --wandb_project="wav2vec2" \ + --wandb_name="wav2vec2-1b-npsc-nst" \ + --remove_punctuation + + +# --fp16 diff --git a/run_flax_speech_recognition_ctc.py b/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..11df8fb90ea54a20f8f34bbb40442193e151ddc2 --- /dev/null +++ b/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1604 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc[data_args.train_split_name]) / (len(npsc[data_args.train_split_name]) + len(npsc[data_args.eval_split_name])) # Use same train/val ratio as NPSC + nst_train = nst[data_args.train_split_name].train_test_split(train_size=split, seed=seed) + nst[data_args.train_split_name] = nst_train["train"] + nst[data_args.eval_split_name] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc[data_args.train_split_name].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst[data_args.train_split_name].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in data_args.train_split_name, data_args.eval_split_name, data_args.test_split_name: + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets[data_args.train_split_name] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets[data_args.eval_split_name] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets[data_args.eval_split_name] = raw_datasets[data_args.eval_split_name].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + raw_datasets[data_args.test_split_name] = raw_datasets[data_args.test_split_name].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets[data_args.train_split_name]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[data_args.eval_split_name], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets[data_args.eval_split_name][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets[data_args.train_split_name], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets[data_args.train_split_name][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix=data_args.train_split_name) + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in [data_args.test_split_name]: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..7354bfa90aed939d148e5ca7b8f5a2b167a730e3 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,190 @@ +{ + "additional_special_tokens": [ + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } + ], + "bos_token": "", + "eos_token": "", + "pad_token": "[PAD]", + "unk_token": "[UNK]" +} diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4cc035b8c547a4620a2aba67fea665613c851fc5 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "bos_token": "", + "do_lower_case": false, + "eos_token": "", + "name_or_path": "./", + "pad_token": "[PAD]", + "replace_word_delimiter_char": " ", + "special_tokens_map_file": null, + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "unk_token": "[UNK]", + "word_delimiter_token": "|" +} diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..f862690b7b4f5e3c3553e1808f5ee83577f4ed80 --- /dev/null +++ b/vocab.json @@ -0,0 +1,41 @@ +{ + "(": 1, + ")": 2, + "0": 3, + "3": 4, + "7": 5, + "8": 6, + "9": 7, + "[PAD]": 38, + "[UNK]": 37, + "a": 8, + "b": 9, + "c": 10, + "d": 11, + "e": 12, + "f": 13, + "g": 14, + "h": 15, + "i": 16, + "j": 17, + "k": 18, + "l": 19, + "m": 20, + "n": 21, + "o": 22, + "p": 23, + "q": 24, + "r": 25, + "s": 26, + "t": 27, + "u": 28, + "v": 29, + "w": 30, + "x": 31, + "y": 32, + "z": 33, + "|": 0, + "å": 34, + "æ": 35, + "ø": 36 +} diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log new file mode 120000 index 0000000000000000000000000000000000000000..bd9a1bb653daf580cb0478bab6bed52227518c91 --- /dev/null +++ b/wandb/debug-internal.log @@ -0,0 +1 @@ +run-20220730_174606-j2u4n7h4/logs/debug-internal.log \ No newline at end of file diff --git a/wandb/debug.log b/wandb/debug.log new file mode 120000 index 0000000000000000000000000000000000000000..f9b9788e845bd50b9d1fd6e4856cebc1c21fd3cf --- /dev/null +++ b/wandb/debug.log @@ -0,0 +1 @@ +run-20220730_174606-j2u4n7h4/logs/debug.log \ No newline at end of file diff --git a/wandb/latest-run b/wandb/latest-run new file mode 120000 index 0000000000000000000000000000000000000000..bf90ee7bb24b3a8fe5d6ffdb572406b591f10e6f --- /dev/null +++ b/wandb/latest-run @@ -0,0 +1 @@ +run-20220730_174606-j2u4n7h4 \ No newline at end of file diff --git a/wandb/run-20220729_183213-356uc50u/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_183213-356uc50u/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe --- /dev/null +++ b/wandb/run-20220729_183213-356uc50u/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1596 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_183213-356uc50u/files/config.yaml b/wandb/run-20220729_183213-356uc50u/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2e94752f5388b27318ca481df18004064ee224e --- /dev/null +++ b/wandb/run-20220729_183213-356uc50u/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659119533 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_183213-356uc50u/files/output.log b/wandb/run-20220729_183213-356uc50u/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..13fed222a38a47168df9365eeae2cb464dba0718 --- /dev/null +++ b/wandb/run-20220729_183213-356uc50u/files/output.log @@ -0,0 +1,253 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=500, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_18-32-09_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=32, +per_device_train_batch_size=32, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=500, +save_strategy=steps, +save_total_limit=3, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +Downloading and preparing dataset nst/no-close to /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53... +Downloading builder script: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13.1k/13.1k [00:00<00:00, 154kB/s] +Downloading data files: 0%| | 0/9 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 870, in main + raw_datasets = make_dataset(seed=training_args.seed) + File "run_flax_speech_recognition_ctc.py", line 802, in make_dataset + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + File "/data/flax/lib/python3.8/site-packages/datasets/load.py", line 1746, in load_dataset + builder_instance.download_and_prepare( + File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 704, in download_and_prepare + self._download_and_prepare( + File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 1227, in _download_and_prepare + super()._download_and_prepare(dl_manager, verify_infos, check_duplicate_keys=verify_infos) + File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 793, in _download_and_prepare + self._prepare_split(split_generator, **prepare_split_kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 1218, in _prepare_split + example = self.info.features.encode_example(record) + File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1614, in encode_example + return encode_nested_example(self, example) + File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1165, in encode_nested_example + { + File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1166, in + k: encode_nested_example(sub_schema, sub_obj, level=level + 1) + File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1220, in encode_nested_example + return schema.encode_example(obj) if obj is not None else None + File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 86, in encode_example + raise ImportError("To support encoding audio data, please install 'soundfile'.") from err +ImportError: To support encoding audio data, please install 'soundfile'. \ No newline at end of file diff --git a/wandb/run-20220729_183213-356uc50u/files/requirements.txt b/wandb/run-20220729_183213-356uc50u/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..aa5033f65271e17cdb6ed0e1630a231dcd521c75 --- /dev/null +++ b/wandb/run-20220729_183213-356uc50u/files/requirements.txt @@ -0,0 +1,137 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +libtpu-nightly==0.1.dev20220722 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pygments==2.11.1 +pyparsing==3.0.6 +python-dateutil==2.8.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.11.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220729_183213-356uc50u/files/wandb-metadata.json b/wandb/run-20220729_183213-356uc50u/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5ebb43cc9f3ec4f7944916acb29df29b05628c98 --- /dev/null +++ b/wandb/run-20220729_183213-356uc50u/files/wandb-metadata.json @@ -0,0 +1,67 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-29T18:32:17.029179", + "startedAt": "2022-07-29T18:32:13.606321", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=32", + "--per_device_eval_batch_size=32", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=3", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project", + "wav2vec2", + "--wandb_name", + "wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json b/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..05d703cd005ebc6bef1c14b739a96224e37fa421 --- /dev/null +++ b/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 256}} \ No newline at end of file diff --git a/wandb/run-20220729_183213-356uc50u/logs/debug-internal.log b/wandb/run-20220729_183213-356uc50u/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..07b19490a0e387933a9d2c98db18dbc39afc9d79 --- /dev/null +++ b/wandb/run-20220729_183213-356uc50u/logs/debug-internal.log @@ -0,0 +1,301 @@ +2022-07-29 18:32:14,486 INFO MainThread:136862 [internal.py:wandb_internal():87] W&B internal server running at pid: 136862, started at: 2022-07-29 18:32:14.486632 +2022-07-29 18:32:14,488 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: check_version +2022-07-29 18:32:14,489 INFO WriterThread:136862 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/run-356uc50u.wandb +2022-07-29 18:32:14,489 DEBUG SenderThread:136862 [sender.py:send():234] send: header +2022-07-29 18:32:14,490 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: check_version +2022-07-29 18:32:14,527 DEBUG SenderThread:136862 [sender.py:send():234] send: run +2022-07-29 18:32:14,729 INFO SenderThread:136862 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files +2022-07-29 18:32:14,729 INFO SenderThread:136862 [sender.py:_start_run_threads():804] run started: 356uc50u with start time 1659119533 +2022-07-29 18:32:14,729 DEBUG SenderThread:136862 [sender.py:send():234] send: summary +2022-07-29 18:32:14,729 INFO SenderThread:136862 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 18:32:14,730 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: run_start +2022-07-29 18:32:15,737 INFO Thread-8 :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json +2022-07-29 18:32:17,028 DEBUG HandlerThread:136862 [meta.py:__init__():40] meta init +2022-07-29 18:32:17,029 DEBUG HandlerThread:136862 [meta.py:__init__():54] meta init done +2022-07-29 18:32:17,029 DEBUG HandlerThread:136862 [meta.py:probe():214] probe +2022-07-29 18:32:17,030 DEBUG HandlerThread:136862 [meta.py:_setup_git():204] setup git +2022-07-29 18:32:17,062 DEBUG HandlerThread:136862 [meta.py:_setup_git():211] setup git done +2022-07-29 18:32:17,062 DEBUG HandlerThread:136862 [meta.py:_save_code():92] save code +2022-07-29 18:32:17,074 DEBUG HandlerThread:136862 [meta.py:_save_code():113] save code done +2022-07-29 18:32:17,074 DEBUG HandlerThread:136862 [meta.py:_save_patches():130] save patches +2022-07-29 18:32:17,166 DEBUG HandlerThread:136862 [meta.py:_save_patches():172] save patches done +2022-07-29 18:32:17,166 DEBUG HandlerThread:136862 [meta.py:_save_pip():58] save pip +2022-07-29 18:32:17,166 DEBUG HandlerThread:136862 [meta.py:_save_pip():72] save pip done +2022-07-29 18:32:17,166 DEBUG HandlerThread:136862 [meta.py:probe():252] probe done +2022-07-29 18:32:17,193 DEBUG SenderThread:136862 [sender.py:send():234] send: files +2022-07-29 18:32:17,193 INFO SenderThread:136862 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-29 18:32:17,193 INFO SenderThread:136862 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-29 18:32:17,199 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:32:17,199 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:32:17,736 INFO Thread-8 :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/code/run_flax_speech_recognition_ctc.py +2022-07-29 18:32:17,737 INFO Thread-8 :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/requirements.txt +2022-07-29 18:32:17,737 INFO Thread-8 :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:17,737 INFO Thread-8 :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-metadata.json +2022-07-29 18:32:17,737 INFO Thread-8 :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/code +2022-07-29 18:32:17,880 INFO Thread-12 :136862 [upload_job.py:push():137] Uploaded file /tmp/tmp1arbfimxwandb/oqv2t90y-code/run_flax_speech_recognition_ctc.py +2022-07-29 18:32:18,151 INFO Thread-11 :136862 [upload_job.py:push():137] Uploaded file /tmp/tmp1arbfimxwandb/1hi0yjav-wandb-metadata.json +2022-07-29 18:32:19,737 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:21,738 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:23,739 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:25,740 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:27,741 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:29,742 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:31,743 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:32,337 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:32:32,338 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:32:33,744 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:35,745 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:37,746 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:39,747 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:41,748 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:43,749 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:45,108 DEBUG SenderThread:136862 [sender.py:send():234] send: stats +2022-07-29 18:32:45,750 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:47,497 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:32:47,497 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:32:47,751 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:49,752 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:51,753 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:53,753 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:55,754 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:57,755 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:32:59,756 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:01,757 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:02,633 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:33:02,633 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:33:03,758 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:05,759 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:07,760 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:09,761 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:11,761 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:13,762 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:15,183 DEBUG SenderThread:136862 [sender.py:send():234] send: stats +2022-07-29 18:33:15,763 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:17,764 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:17,772 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:33:17,772 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:33:19,765 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:21,766 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:23,767 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:25,768 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:27,769 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:29,770 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:31,771 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:32,909 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:33:32,909 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:33:33,772 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:35,773 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:37,774 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:39,775 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:41,776 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:43,777 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:45,245 DEBUG SenderThread:136862 [sender.py:send():234] send: stats +2022-07-29 18:33:45,778 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:47,779 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:48,051 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:33:48,051 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:33:49,780 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:51,781 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:53,782 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:55,783 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:57,784 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:33:59,785 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:01,786 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:03,192 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:34:03,192 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:34:03,786 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:05,787 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:07,788 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:09,789 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:11,790 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:13,791 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:15,308 DEBUG SenderThread:136862 [sender.py:send():234] send: stats +2022-07-29 18:34:15,792 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:17,793 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:18,334 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:34:18,334 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:34:19,794 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:21,795 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:23,796 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:25,797 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:27,798 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:29,799 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:31,800 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:33,472 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:34:33,472 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:34:33,801 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:35,802 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:37,803 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:39,804 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:41,805 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:43,806 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:45,381 DEBUG SenderThread:136862 [sender.py:send():234] send: stats +2022-07-29 18:34:45,807 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:47,808 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:48,609 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:34:48,610 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:34:49,809 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:51,810 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:53,811 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:55,812 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:57,813 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:34:59,814 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:01,815 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:03,748 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:35:03,748 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:35:03,815 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:05,816 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:07,817 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:09,818 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:11,819 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:13,820 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:15,454 DEBUG SenderThread:136862 [sender.py:send():234] send: stats +2022-07-29 18:35:15,821 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:17,822 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:18,886 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:35:18,886 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:35:19,823 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:33,829 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:34,020 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:35:34,021 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:35:35,830 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:37,831 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:39,831 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:41,832 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:43,833 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:45,525 DEBUG SenderThread:136862 [sender.py:send():234] send: stats +2022-07-29 18:35:45,834 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:47,836 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:49,158 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:35:49,158 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:35:49,837 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:51,838 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:53,839 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:55,840 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:57,841 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:35:59,842 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:01,843 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:03,844 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:04,296 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:36:04,296 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:36:05,845 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:07,846 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:09,846 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:11,847 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:13,848 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:15,598 DEBUG SenderThread:136862 [sender.py:send():234] send: stats +2022-07-29 18:36:15,849 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:17,850 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:19,431 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:36:19,431 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:36:19,851 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:23,853 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:29,855 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:30,795 DEBUG SenderThread:136862 [sender.py:send():234] send: telemetry +2022-07-29 18:36:30,795 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:36:30,795 DEBUG SenderThread:136862 [sender.py:send():234] send: exit +2022-07-29 18:36:30,795 INFO SenderThread:136862 [sender.py:send_exit():366] handling exit code: 1 +2022-07-29 18:36:30,796 INFO SenderThread:136862 [sender.py:send_exit():368] handling runtime: 256 +2022-07-29 18:36:30,798 INFO SenderThread:136862 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 18:36:30,798 INFO SenderThread:136862 [sender.py:send_exit():374] send defer +2022-07-29 18:36:30,798 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:36:30,799 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:36:30,799 INFO HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-29 18:36:30,799 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: defer +2022-07-29 18:36:30,799 INFO SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-29 18:36:30,799 INFO SenderThread:136862 [sender.py:transition_state():387] send defer: 1 +2022-07-29 18:36:30,800 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:36:30,800 INFO HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-29 18:36:30,830 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: defer +2022-07-29 18:36:30,830 INFO SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-29 18:36:30,830 INFO SenderThread:136862 [sender.py:transition_state():387] send defer: 2 +2022-07-29 18:36:30,831 DEBUG SenderThread:136862 [sender.py:send():234] send: stats +2022-07-29 18:36:30,831 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:36:30,831 INFO HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-29 18:36:30,831 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: defer +2022-07-29 18:36:30,831 INFO SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-29 18:36:30,831 INFO SenderThread:136862 [sender.py:transition_state():387] send defer: 3 +2022-07-29 18:36:30,832 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:36:30,832 INFO HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-29 18:36:30,832 DEBUG SenderThread:136862 [sender.py:send():234] send: summary +2022-07-29 18:36:30,832 INFO SenderThread:136862 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 18:36:30,832 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: defer +2022-07-29 18:36:30,832 INFO SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-29 18:36:30,832 INFO SenderThread:136862 [sender.py:transition_state():387] send defer: 4 +2022-07-29 18:36:30,832 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:36:30,833 INFO HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-29 18:36:30,833 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: defer +2022-07-29 18:36:30,833 INFO SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-29 18:36:30,856 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json +2022-07-29 18:36:30,856 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:30,900 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:36:31,177 INFO SenderThread:136862 [sender.py:transition_state():387] send defer: 5 +2022-07-29 18:36:31,177 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:36:31,178 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:36:31,178 INFO HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-29 18:36:31,178 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: defer +2022-07-29 18:36:31,178 INFO SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-29 18:36:31,178 INFO SenderThread:136862 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-29 18:36:31,279 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:36:31,856 INFO Thread-8 :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/config.yaml +2022-07-29 18:36:31,857 INFO SenderThread:136862 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files +2022-07-29 18:36:31,857 INFO SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/config.yaml config.yaml +2022-07-29 18:36:31,857 INFO SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/requirements.txt requirements.txt +2022-07-29 18:36:31,857 INFO SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log output.log +2022-07-29 18:36:31,858 INFO SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json wandb-summary.json +2022-07-29 18:36:31,858 INFO SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-metadata.json wandb-metadata.json +2022-07-29 18:36:31,863 INFO SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-29 18:36:31,863 INFO SenderThread:136862 [sender.py:transition_state():387] send defer: 6 +2022-07-29 18:36:31,864 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:36:31,870 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:36:31,870 INFO HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-29 18:36:31,870 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: defer +2022-07-29 18:36:31,870 INFO SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-29 18:36:31,870 INFO SenderThread:136862 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 18:36:31,965 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:36:31,965 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:36:32,067 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:36:32,067 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:36:32,169 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:36:32,169 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:36:32,270 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:36:32,271 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:36:32,332 INFO Thread-13 :136862 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/config.yaml +2022-07-29 18:36:32,338 INFO Thread-16 :136862 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json +2022-07-29 18:36:32,340 INFO Thread-14 :136862 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/requirements.txt +2022-07-29 18:36:32,348 INFO Thread-15 :136862 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log +2022-07-29 18:36:32,372 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:36:32,372 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:36:32,473 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:36:32,474 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:36:32,548 INFO Thread-7 :136862 [sender.py:transition_state():387] send defer: 7 +2022-07-29 18:36:32,549 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:36:32,549 INFO HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-29 18:36:32,549 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: defer +2022-07-29 18:36:32,549 INFO SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-29 18:36:32,575 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:36:33,221 INFO SenderThread:136862 [sender.py:transition_state():387] send defer: 8 +2022-07-29 18:36:33,221 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:36:33,222 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:36:33,222 INFO HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-29 18:36:33,222 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: defer +2022-07-29 18:36:33,222 INFO SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-29 18:36:33,222 INFO SenderThread:136862 [sender.py:transition_state():387] send defer: 9 +2022-07-29 18:36:33,223 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:36:33,223 INFO HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-29 18:36:33,223 DEBUG SenderThread:136862 [sender.py:send():234] send: final +2022-07-29 18:36:33,223 DEBUG SenderThread:136862 [sender.py:send():234] send: footer +2022-07-29 18:36:33,224 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: defer +2022-07-29 18:36:33,224 INFO SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-29 18:36:33,323 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:36:33,323 DEBUG SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:36:33,323 INFO SenderThread:136862 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 18:36:33,628 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: get_summary +2022-07-29 18:36:33,629 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-29 18:36:33,630 DEBUG HandlerThread:136862 [handler.py:handle_request():130] handle_request: shutdown +2022-07-29 18:36:33,630 INFO HandlerThread:136862 [handler.py:finish():731] shutting down handler +2022-07-29 18:36:34,224 INFO WriterThread:136862 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/run-356uc50u.wandb +2022-07-29 18:36:34,627 INFO SenderThread:136862 [sender.py:finish():1070] shutting down sender +2022-07-29 18:36:34,627 INFO SenderThread:136862 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 18:36:34,627 INFO SenderThread:136862 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 18:36:34,630 INFO MainThread:136862 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220729_183213-356uc50u/logs/debug.log b/wandb/run-20220729_183213-356uc50u/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..f369ca3d2266e28987e9d86c9bf86bc0de963656 --- /dev/null +++ b/wandb/run-20220729_183213-356uc50u/logs/debug.log @@ -0,0 +1,130 @@ +2022-07-29 18:32:13,607 INFO MainThread:135604 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-29 18:32:13,608 INFO MainThread:135604 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-29 18:32:13,608 INFO MainThread:135604 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/logs/debug.log +2022-07-29 18:32:13,608 INFO MainThread:135604 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/logs/debug-internal.log +2022-07-29 18:32:13,608 INFO MainThread:135604 [wandb_init.py:init():404] calling init triggers +2022-07-29 18:32:13,608 INFO MainThread:135604 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-29 18:32:13,608 INFO MainThread:135604 [wandb_init.py:init():460] starting backend +2022-07-29 18:32:13,608 INFO MainThread:135604 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-29 18:32:13,667 INFO MainThread:135604 [backend.py:ensure_launched():216] starting backend process... +2022-07-29 18:32:13,694 INFO MainThread:135604 [backend.py:ensure_launched():221] started backend process with pid: 136862 +2022-07-29 18:32:13,698 INFO MainThread:135604 [wandb_init.py:init():469] backend started and connected +2022-07-29 18:32:13,713 INFO MainThread:135604 [wandb_init.py:init():533] updated telemetry +2022-07-29 18:32:13,778 INFO MainThread:135604 [wandb_init.py:init():563] communicating current version +2022-07-29 18:32:14,526 INFO MainThread:135604 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-29 18:32:14,526 INFO MainThread:135604 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-29 18:32:14,730 INFO MainThread:135604 [wandb_init.py:init():606] starting run threads in backend +2022-07-29 18:32:17,197 INFO MainThread:135604 [wandb_run.py:_console_start():1810] atexit reg +2022-07-29 18:32:17,197 INFO MainThread:135604 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-29 18:32:17,198 INFO MainThread:135604 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-29 18:32:17,204 INFO MainThread:135604 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-29 18:32:17,204 INFO MainThread:135604 [wandb_init.py:init():633] run started, returning control to user process +2022-07-29 18:36:28,486 INFO MainThread:135604 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-29 18:36:28,502 INFO MainThread:135604 [wandb_run.py:_restore():1752] restore +2022-07-29 18:36:30,799 INFO MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73662 + total_bytes: 73662 +} + +2022-07-29 18:36:31,178 INFO MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73662 + total_bytes: 73662 +} + +2022-07-29 18:36:31,864 INFO MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73662 + total_bytes: 98442 +} + +2022-07-29 18:36:31,966 INFO MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73662 + total_bytes: 98442 +} + +2022-07-29 18:36:32,068 INFO MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 98442 + total_bytes: 98442 +} + +2022-07-29 18:36:32,170 INFO MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 98442 + total_bytes: 98442 +} + +2022-07-29 18:36:32,271 INFO MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 98442 + total_bytes: 98442 +} + +2022-07-29 18:36:32,373 INFO MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 98442 + total_bytes: 98442 +} + +2022-07-29 18:36:32,474 INFO MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 98442 + total_bytes: 98442 +} + +2022-07-29 18:36:33,222 INFO MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 98442 + total_bytes: 98442 +} + +2022-07-29 18:36:33,627 INFO MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 98442 + total_bytes: 98442 +} +local_info { +} + +2022-07-29 18:36:35,126 INFO MainThread:135604 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220729_183213-356uc50u/run-356uc50u.wandb b/wandb/run-20220729_183213-356uc50u/run-356uc50u.wandb new file mode 100644 index 0000000000000000000000000000000000000000..3c662a838aedae5fdf972eb40e73fe0930af9ff6 --- /dev/null +++ b/wandb/run-20220729_183213-356uc50u/run-356uc50u.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb83d82b3b2c65b07ff90b0a90d06625b3524560ee7653e99485761b1a56795 +size 73924 diff --git a/wandb/run-20220729_184558-17ksemgv/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_184558-17ksemgv/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe --- /dev/null +++ b/wandb/run-20220729_184558-17ksemgv/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1596 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_184558-17ksemgv/files/config.yaml b/wandb/run-20220729_184558-17ksemgv/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d4d0b698347efd2ad514df94f93abca922e9980 --- /dev/null +++ b/wandb/run-20220729_184558-17ksemgv/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659120358 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_184558-17ksemgv/files/output.log b/wandb/run-20220729_184558-17ksemgv/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e398040d6c714dd10c8d12b43106aa83ca649fb4 --- /dev/null +++ b/wandb/run-20220729_184558-17ksemgv/files/output.log @@ -0,0 +1,125 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=500, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_18-45-54_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=32, +per_device_train_batch_size=32, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=500, +save_strategy=steps, +save_total_limit=3, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +Downloading and preparing dataset nst/no-close to /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53... +Downloading data files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 6912.42it/s] +Downloading data files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 3877.63it/s] +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1596, in + main() + File "run_flax_speech_recognition_ctc.py", line 870, in main + raw_datasets = make_dataset(seed=training_args.seed) + File "run_flax_speech_recognition_ctc.py", line 802, in make_dataset + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + File "/data/flax/lib/python3.8/site-packages/datasets/load.py", line 1746, in load_dataset + builder_instance.download_and_prepare( + File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 704, in download_and_prepare + self._download_and_prepare( + File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 1227, in _download_and_prepare + super()._download_and_prepare(dl_manager, verify_infos, check_duplicate_keys=verify_infos) + File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 795, in _download_and_prepare + raise OSError( +OSError: Cannot find data file. +Original error: +sndfile library not found \ No newline at end of file diff --git a/wandb/run-20220729_184558-17ksemgv/files/requirements.txt b/wandb/run-20220729_184558-17ksemgv/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..900d3d3303fe2688be1dc797f590f7b5e7fe6e22 --- /dev/null +++ b/wandb/run-20220729_184558-17ksemgv/files/requirements.txt @@ -0,0 +1,149 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pygments==2.11.1 +pyparsing==3.0.6 +python-dateutil==2.8.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +soundfile==0.10.3.post1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.11.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220729_184558-17ksemgv/files/wandb-metadata.json b/wandb/run-20220729_184558-17ksemgv/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0176e229ced665dfe75d1818d378c53db5213a3e --- /dev/null +++ b/wandb/run-20220729_184558-17ksemgv/files/wandb-metadata.json @@ -0,0 +1,67 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-29T18:46:01.736927", + "startedAt": "2022-07-29T18:45:58.536643", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=32", + "--per_device_eval_batch_size=32", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=500", + "--eval_steps=500", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=3", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project", + "wav2vec2", + "--wandb_name", + "wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json b/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..2bc5aefdc7f6669183104a6d63606b2dade460cd --- /dev/null +++ b/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 30}} \ No newline at end of file diff --git a/wandb/run-20220729_184558-17ksemgv/logs/debug-internal.log b/wandb/run-20220729_184558-17ksemgv/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..700123cf57ecd3f1f31758c0c6a37af64dcdb384 --- /dev/null +++ b/wandb/run-20220729_184558-17ksemgv/logs/debug-internal.log @@ -0,0 +1,155 @@ +2022-07-29 18:45:59,355 INFO MainThread:151536 [internal.py:wandb_internal():87] W&B internal server running at pid: 151536, started at: 2022-07-29 18:45:59.355794 +2022-07-29 18:45:59,357 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: check_version +2022-07-29 18:45:59,357 INFO WriterThread:151536 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/run-17ksemgv.wandb +2022-07-29 18:45:59,358 DEBUG SenderThread:151536 [sender.py:send():234] send: header +2022-07-29 18:45:59,358 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: check_version +2022-07-29 18:45:59,396 DEBUG SenderThread:151536 [sender.py:send():234] send: run +2022-07-29 18:45:59,569 INFO SenderThread:151536 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files +2022-07-29 18:45:59,569 INFO SenderThread:151536 [sender.py:_start_run_threads():804] run started: 17ksemgv with start time 1659120358 +2022-07-29 18:45:59,569 DEBUG SenderThread:151536 [sender.py:send():234] send: summary +2022-07-29 18:45:59,570 INFO SenderThread:151536 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 18:45:59,570 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: run_start +2022-07-29 18:46:00,572 INFO Thread-8 :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json +2022-07-29 18:46:01,736 DEBUG HandlerThread:151536 [meta.py:__init__():40] meta init +2022-07-29 18:46:01,736 DEBUG HandlerThread:151536 [meta.py:__init__():54] meta init done +2022-07-29 18:46:01,736 DEBUG HandlerThread:151536 [meta.py:probe():214] probe +2022-07-29 18:46:01,737 DEBUG HandlerThread:151536 [meta.py:_setup_git():204] setup git +2022-07-29 18:46:01,767 DEBUG HandlerThread:151536 [meta.py:_setup_git():211] setup git done +2022-07-29 18:46:01,767 DEBUG HandlerThread:151536 [meta.py:_save_code():92] save code +2022-07-29 18:46:01,778 DEBUG HandlerThread:151536 [meta.py:_save_code():113] save code done +2022-07-29 18:46:01,778 DEBUG HandlerThread:151536 [meta.py:_save_patches():130] save patches +2022-07-29 18:46:01,832 DEBUG HandlerThread:151536 [meta.py:_save_patches():172] save patches done +2022-07-29 18:46:01,832 DEBUG HandlerThread:151536 [meta.py:_save_pip():58] save pip +2022-07-29 18:46:01,833 DEBUG HandlerThread:151536 [meta.py:_save_pip():72] save pip done +2022-07-29 18:46:01,833 DEBUG HandlerThread:151536 [meta.py:probe():252] probe done +2022-07-29 18:46:01,836 DEBUG SenderThread:151536 [sender.py:send():234] send: files +2022-07-29 18:46:01,836 INFO SenderThread:151536 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-29 18:46:01,836 INFO SenderThread:151536 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-29 18:46:01,841 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:46:01,842 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:46:02,351 INFO Thread-11 :151536 [upload_job.py:push():137] Uploaded file /tmp/tmpnc48zcdrwandb/1iq31jnf-wandb-metadata.json +2022-07-29 18:46:02,540 INFO Thread-12 :151536 [upload_job.py:push():137] Uploaded file /tmp/tmpnc48zcdrwandb/18lhcemh-code/run_flax_speech_recognition_ctc.py +2022-07-29 18:46:02,583 INFO Thread-8 :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/code/run_flax_speech_recognition_ctc.py +2022-07-29 18:46:02,583 INFO Thread-8 :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log +2022-07-29 18:46:02,583 INFO Thread-8 :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-metadata.json +2022-07-29 18:46:02,584 INFO Thread-8 :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/requirements.txt +2022-07-29 18:46:02,584 INFO Thread-8 :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/code +2022-07-29 18:46:04,584 INFO Thread-8 :151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log +2022-07-29 18:46:06,585 INFO Thread-8 :151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log +2022-07-29 18:46:16,994 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:46:16,994 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:46:18,589 INFO Thread-8 :151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log +2022-07-29 18:46:22,591 INFO Thread-8 :151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log +2022-07-29 18:46:29,819 DEBUG SenderThread:151536 [sender.py:send():234] send: stats +2022-07-29 18:46:30,399 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:30,399 DEBUG SenderThread:151536 [sender.py:send():234] send: telemetry +2022-07-29 18:46:30,400 DEBUG SenderThread:151536 [sender.py:send():234] send: exit +2022-07-29 18:46:30,400 INFO SenderThread:151536 [sender.py:send_exit():366] handling exit code: 1 +2022-07-29 18:46:30,400 INFO SenderThread:151536 [sender.py:send_exit():368] handling runtime: 30 +2022-07-29 18:46:30,400 INFO SenderThread:151536 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 18:46:30,401 INFO SenderThread:151536 [sender.py:send_exit():374] send defer +2022-07-29 18:46:30,401 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:30,401 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:46:30,401 INFO HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-29 18:46:30,402 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: defer +2022-07-29 18:46:30,402 INFO SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-29 18:46:30,402 INFO SenderThread:151536 [sender.py:transition_state():387] send defer: 1 +2022-07-29 18:46:30,402 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:46:30,402 INFO HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-29 18:46:30,421 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: defer +2022-07-29 18:46:30,421 INFO SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-29 18:46:30,422 INFO SenderThread:151536 [sender.py:transition_state():387] send defer: 2 +2022-07-29 18:46:30,422 DEBUG SenderThread:151536 [sender.py:send():234] send: stats +2022-07-29 18:46:30,422 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:46:30,422 INFO HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-29 18:46:30,422 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: defer +2022-07-29 18:46:30,422 INFO SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-29 18:46:30,422 INFO SenderThread:151536 [sender.py:transition_state():387] send defer: 3 +2022-07-29 18:46:30,423 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:46:30,423 INFO HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-29 18:46:30,423 DEBUG SenderThread:151536 [sender.py:send():234] send: summary +2022-07-29 18:46:30,423 INFO SenderThread:151536 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 18:46:30,423 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: defer +2022-07-29 18:46:30,423 INFO SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-29 18:46:30,423 INFO SenderThread:151536 [sender.py:transition_state():387] send defer: 4 +2022-07-29 18:46:30,424 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:46:30,424 INFO HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-29 18:46:30,424 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: defer +2022-07-29 18:46:30,424 INFO SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-29 18:46:30,503 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:30,585 INFO SenderThread:151536 [sender.py:transition_state():387] send defer: 5 +2022-07-29 18:46:30,585 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:30,585 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:46:30,585 INFO HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-29 18:46:30,586 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: defer +2022-07-29 18:46:30,586 INFO SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-29 18:46:30,586 INFO SenderThread:151536 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-29 18:46:30,594 INFO SenderThread:151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json +2022-07-29 18:46:30,594 INFO SenderThread:151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log +2022-07-29 18:46:30,594 INFO SenderThread:151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/config.yaml +2022-07-29 18:46:30,594 INFO SenderThread:151536 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files +2022-07-29 18:46:30,594 INFO SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/config.yaml config.yaml +2022-07-29 18:46:30,594 INFO SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/requirements.txt requirements.txt +2022-07-29 18:46:30,595 INFO SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log output.log +2022-07-29 18:46:30,595 INFO SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json wandb-summary.json +2022-07-29 18:46:30,595 INFO SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-metadata.json wandb-metadata.json +2022-07-29 18:46:30,604 INFO SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-29 18:46:30,604 INFO SenderThread:151536 [sender.py:transition_state():387] send defer: 6 +2022-07-29 18:46:30,604 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:46:30,605 INFO HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-29 18:46:30,605 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: defer +2022-07-29 18:46:30,605 INFO SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-29 18:46:30,605 INFO SenderThread:151536 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 18:46:30,686 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:30,686 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:30,789 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:30,789 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:30,891 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:30,891 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:30,992 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:30,993 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:31,082 INFO Thread-13 :151536 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/config.yaml +2022-07-29 18:46:31,094 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:31,094 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:31,107 INFO Thread-16 :151536 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json +2022-07-29 18:46:31,119 INFO Thread-15 :151536 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log +2022-07-29 18:46:31,195 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:31,196 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:31,297 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:31,297 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:31,319 INFO Thread-14 :151536 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/requirements.txt +2022-07-29 18:46:31,398 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:31,399 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:31,500 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:31,500 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:31,519 INFO Thread-7 :151536 [sender.py:transition_state():387] send defer: 7 +2022-07-29 18:46:31,520 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:46:31,520 INFO HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-29 18:46:31,520 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: defer +2022-07-29 18:46:31,520 INFO SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-29 18:46:31,601 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:31,949 INFO SenderThread:151536 [sender.py:transition_state():387] send defer: 8 +2022-07-29 18:46:31,949 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:31,950 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:46:31,950 INFO HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-29 18:46:31,950 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: defer +2022-07-29 18:46:31,950 INFO SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-29 18:46:31,950 INFO SenderThread:151536 [sender.py:transition_state():387] send defer: 9 +2022-07-29 18:46:31,950 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:46:31,950 DEBUG SenderThread:151536 [sender.py:send():234] send: final +2022-07-29 18:46:31,951 INFO HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-29 18:46:31,951 DEBUG SenderThread:151536 [sender.py:send():234] send: footer +2022-07-29 18:46:31,951 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: defer +2022-07-29 18:46:31,951 INFO SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-29 18:46:32,051 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:46:32,051 DEBUG SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:46:32,051 INFO SenderThread:151536 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 18:46:32,311 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: get_summary +2022-07-29 18:46:32,312 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-29 18:46:32,312 DEBUG HandlerThread:151536 [handler.py:handle_request():130] handle_request: shutdown +2022-07-29 18:46:32,312 INFO HandlerThread:151536 [handler.py:finish():731] shutting down handler +2022-07-29 18:46:32,951 INFO WriterThread:151536 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/run-17ksemgv.wandb +2022-07-29 18:46:33,310 INFO SenderThread:151536 [sender.py:finish():1070] shutting down sender +2022-07-29 18:46:33,310 INFO SenderThread:151536 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 18:46:33,310 INFO SenderThread:151536 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 18:46:33,312 INFO MainThread:151536 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220729_184558-17ksemgv/logs/debug.log b/wandb/run-20220729_184558-17ksemgv/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..a8ce84d6f5e6e827e2c5c316e99533f60ce6d4ab --- /dev/null +++ b/wandb/run-20220729_184558-17ksemgv/logs/debug.log @@ -0,0 +1,148 @@ +2022-07-29 18:45:58,537 INFO MainThread:150277 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-29 18:45:58,538 INFO MainThread:150277 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-29 18:45:58,538 INFO MainThread:150277 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/logs/debug.log +2022-07-29 18:45:58,538 INFO MainThread:150277 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/logs/debug-internal.log +2022-07-29 18:45:58,538 INFO MainThread:150277 [wandb_init.py:init():404] calling init triggers +2022-07-29 18:45:58,538 INFO MainThread:150277 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-29 18:45:58,538 INFO MainThread:150277 [wandb_init.py:init():460] starting backend +2022-07-29 18:45:58,538 INFO MainThread:150277 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-29 18:45:58,564 INFO MainThread:150277 [backend.py:ensure_launched():216] starting backend process... +2022-07-29 18:45:58,588 INFO MainThread:150277 [backend.py:ensure_launched():221] started backend process with pid: 151536 +2022-07-29 18:45:58,590 INFO MainThread:150277 [wandb_init.py:init():469] backend started and connected +2022-07-29 18:45:58,602 INFO MainThread:150277 [wandb_init.py:init():533] updated telemetry +2022-07-29 18:45:58,661 INFO MainThread:150277 [wandb_init.py:init():563] communicating current version +2022-07-29 18:45:59,394 INFO MainThread:150277 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-29 18:45:59,394 INFO MainThread:150277 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-29 18:45:59,570 INFO MainThread:150277 [wandb_init.py:init():606] starting run threads in backend +2022-07-29 18:46:01,841 INFO MainThread:150277 [wandb_run.py:_console_start():1810] atexit reg +2022-07-29 18:46:01,841 INFO MainThread:150277 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-29 18:46:01,842 INFO MainThread:150277 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-29 18:46:01,844 INFO MainThread:150277 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-29 18:46:01,844 INFO MainThread:150277 [wandb_init.py:init():633] run started, returning control to user process +2022-07-29 18:46:28,169 INFO MainThread:150277 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-29 18:46:28,175 INFO MainThread:150277 [wandb_run.py:_restore():1752] restore +2022-07-29 18:46:30,401 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73662 + total_bytes: 73662 +} + +2022-07-29 18:46:30,586 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73662 + total_bytes: 73662 +} + +2022-07-29 18:46:30,687 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73662 + total_bytes: 81928 +} + +2022-07-29 18:46:30,790 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 79233 + total_bytes: 81928 +} + +2022-07-29 18:46:30,892 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 79260 + total_bytes: 81928 +} + +2022-07-29 18:46:30,993 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 79260 + total_bytes: 81928 +} + +2022-07-29 18:46:31,095 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 81928 + total_bytes: 81928 +} + +2022-07-29 18:46:31,196 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 81928 + total_bytes: 81928 +} + +2022-07-29 18:46:31,298 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 81928 + total_bytes: 81928 +} + +2022-07-29 18:46:31,399 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 81928 + total_bytes: 81928 +} + +2022-07-29 18:46:31,501 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 81928 + total_bytes: 81928 +} + +2022-07-29 18:46:31,950 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 81928 + total_bytes: 81928 +} + +2022-07-29 18:46:32,310 INFO MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 81928 + total_bytes: 81928 +} +local_info { +} + +2022-07-29 18:46:33,786 INFO MainThread:150277 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220729_184558-17ksemgv/run-17ksemgv.wandb b/wandb/run-20220729_184558-17ksemgv/run-17ksemgv.wandb new file mode 100644 index 0000000000000000000000000000000000000000..7483c2dbcaff83cfe1f43bf70a9c2bfb08124aa2 --- /dev/null +++ b/wandb/run-20220729_184558-17ksemgv/run-17ksemgv.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeafbf0c6357efcfabe27fe93edbeb5d9c48f7372da0e68e6b6b22a7548de9df +size 6320 diff --git a/wandb/run-20220729_184945-3vhqjnn9/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_184945-3vhqjnn9/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe --- /dev/null +++ b/wandb/run-20220729_184945-3vhqjnn9/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1596 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_184945-3vhqjnn9/files/config.yaml b/wandb/run-20220729_184945-3vhqjnn9/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23749e5e0dfae07e8573d73012b1a148268b2c54 --- /dev/null +++ b/wandb/run-20220729_184945-3vhqjnn9/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659120585 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_184945-3vhqjnn9/files/output.log b/wandb/run-20220729_184945-3vhqjnn9/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..abb7d0c55175cf5db1100ac493415dbe07a2505a --- /dev/null +++ b/wandb/run-20220729_184945-3vhqjnn9/files/output.log @@ -0,0 +1,313 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=500, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_18-49-41_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=32, +per_device_train_batch_size=32, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=500, +save_strategy=steps, +save_total_limit=3, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +Downloading and preparing dataset nst/no-close to /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53... +Downloading data files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 6694.23it/s] +Downloading data files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 3591.01it/s] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Generating test split: 72763 examples [00:49, 2217.99 examples/s] + +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 32.19it/s] +Downloading builder script: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.20k/9.20k [00:00<00:00, 7.87MB/s] +Downloading and preparing dataset npsc/16K_mp3 to /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc... +Downloading data: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 51.1k/51.1k [00:00<00:00, 299kB/s] +Downloading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 498k/498k [00:00<00:00, 1.45MB/s] +Downloading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 525k/525k [00:00<00:00, 1.23MB/s] +Downloading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 758k/758k [00:00<00:00, 1.76MB/s] +Downloading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 681k/681k [00:00<00:00, 1.58MB/s] +Downloading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 852k/852k [00:00<00:00, 1.66MB/s] +Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.60M/1.60M [00:00<00:00, 3.08MB/s] +Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.27M/1.27M [00:00<00:00, 2.45MB/s] +Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.58M/1.58M [00:00<00:00, 3.05MB/s] +Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.89M/1.89M [00:00<00:00, 3.17MB/s] +Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.40M/1.40M [00:00<00:00, 2.72MB/s] +Downloading data files #11: 0%| | 0/2 [00:00=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_185656-3cqptots/files/config.yaml b/wandb/run-20220729_185656-3cqptots/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12a5489fcc147454cdebaaa0ceeac3c7c8066a92 --- /dev/null +++ b/wandb/run-20220729_185656-3cqptots/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659121016 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_185656-3cqptots/files/output.log b/wandb/run-20220729_185656-3cqptots/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..cf01da2384c1193107b337f68af2be8a462fe957 --- /dev/null +++ b/wandb/run-20220729_185656-3cqptots/files/output.log @@ -0,0 +1,158 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_18-56-52_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=32, +per_device_train_batch_size=32, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 32.04it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 451.08it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow + 0%| | 0/256 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 870, in main + raw_datasets = make_dataset(seed=training_args.seed) + File "run_flax_speech_recognition_ctc.py", line 811, in make_dataset + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 865, in filter + { + File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 866, in + k: dataset.filter( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper + out = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2904, in filter + indices = self.map( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2387, in map + return self._map_single( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 557, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper + out = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2775, in _map_single + batch = apply_function_on_filtered_inputs( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2655, in apply_function_on_filtered_inputs + processed_inputs = function(*fn_args, *additional_args, **fn_kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2347, in decorated + result = f(decorated_item, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in get_indices_from_mask_function + example = {key: batch[key][i] for key in batch} + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in + example = {key: batch[key][i] for key in batch} + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 133, in __getitem__ + values = [ + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 134, in + decode_nested_example(self.features[key], value) if value is not None else None for value in values + File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1260, in decode_nested_example + return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) if obj is not None else None + File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 144, in decode_example + array, sampling_rate = self._decode_mp3(file if file else path) + File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 293, in _decode_mp3 + array, sampling_rate = torchaudio.load(path_or_file, format="mp3") + File "/data/flax/lib/python3.8/site-packages/torchaudio/backend/sox_io_backend.py", line 214, in load + return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format) +NameError: name '_fallback_load_fileobj' is not defined \ No newline at end of file diff --git a/wandb/run-20220729_185656-3cqptots/files/requirements.txt b/wandb/run-20220729_185656-3cqptots/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..82f0601b1659d07d7747ba139250246bf7eae997 --- /dev/null +++ b/wandb/run-20220729_185656-3cqptots/files/requirements.txt @@ -0,0 +1,150 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pygments==2.11.1 +pyparsing==3.0.6 +python-dateutil==2.8.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +soundfile==0.10.3.post1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220729_185656-3cqptots/files/wandb-metadata.json b/wandb/run-20220729_185656-3cqptots/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..c865d99832a55a713188d199f8ecaa68e7e23b19 --- /dev/null +++ b/wandb/run-20220729_185656-3cqptots/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-29T18:57:00.004046", + "startedAt": "2022-07-29T18:56:56.698418", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=32", + "--per_device_eval_batch_size=32", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json b/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..b1ac4f7d3564b2fd407d247e6957709faa41a169 --- /dev/null +++ b/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 9}} \ No newline at end of file diff --git a/wandb/run-20220729_185656-3cqptots/logs/debug-internal.log b/wandb/run-20220729_185656-3cqptots/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..a764a1101709cb1d8e8ad512f5f6066dcb3c7111 --- /dev/null +++ b/wandb/run-20220729_185656-3cqptots/logs/debug-internal.log @@ -0,0 +1,149 @@ +2022-07-29 18:56:57,526 INFO MainThread:165164 [internal.py:wandb_internal():87] W&B internal server running at pid: 165164, started at: 2022-07-29 18:56:57.526162 +2022-07-29 18:56:57,528 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: check_version +2022-07-29 18:56:57,528 INFO WriterThread:165164 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/run-3cqptots.wandb +2022-07-29 18:56:57,529 DEBUG SenderThread:165164 [sender.py:send():234] send: header +2022-07-29 18:56:57,529 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: check_version +2022-07-29 18:56:57,565 DEBUG SenderThread:165164 [sender.py:send():234] send: run +2022-07-29 18:56:57,749 INFO SenderThread:165164 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files +2022-07-29 18:56:57,749 INFO SenderThread:165164 [sender.py:_start_run_threads():804] run started: 3cqptots with start time 1659121016 +2022-07-29 18:56:57,750 DEBUG SenderThread:165164 [sender.py:send():234] send: summary +2022-07-29 18:56:57,750 INFO SenderThread:165164 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 18:56:57,752 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: run_start +2022-07-29 18:56:58,757 INFO Thread-8 :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json +2022-07-29 18:57:00,003 DEBUG HandlerThread:165164 [meta.py:__init__():40] meta init +2022-07-29 18:57:00,003 DEBUG HandlerThread:165164 [meta.py:__init__():54] meta init done +2022-07-29 18:57:00,004 DEBUG HandlerThread:165164 [meta.py:probe():214] probe +2022-07-29 18:57:00,005 DEBUG HandlerThread:165164 [meta.py:_setup_git():204] setup git +2022-07-29 18:57:00,034 DEBUG HandlerThread:165164 [meta.py:_setup_git():211] setup git done +2022-07-29 18:57:00,034 DEBUG HandlerThread:165164 [meta.py:_save_code():92] save code +2022-07-29 18:57:00,046 DEBUG HandlerThread:165164 [meta.py:_save_code():113] save code done +2022-07-29 18:57:00,046 DEBUG HandlerThread:165164 [meta.py:_save_patches():130] save patches +2022-07-29 18:57:00,099 DEBUG HandlerThread:165164 [meta.py:_save_patches():172] save patches done +2022-07-29 18:57:00,100 DEBUG HandlerThread:165164 [meta.py:_save_pip():58] save pip +2022-07-29 18:57:00,100 DEBUG HandlerThread:165164 [meta.py:_save_pip():72] save pip done +2022-07-29 18:57:00,100 DEBUG HandlerThread:165164 [meta.py:probe():252] probe done +2022-07-29 18:57:00,103 DEBUG SenderThread:165164 [sender.py:send():234] send: files +2022-07-29 18:57:00,103 INFO SenderThread:165164 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-29 18:57:00,104 INFO SenderThread:165164 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-29 18:57:00,109 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 18:57:00,109 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: stop_status +2022-07-29 18:57:00,591 INFO Thread-11 :165164 [upload_job.py:push():137] Uploaded file /tmp/tmp02y1jub5wandb/axei5139-wandb-metadata.json +2022-07-29 18:57:00,757 INFO Thread-8 :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/code/run_flax_speech_recognition_ctc.py +2022-07-29 18:57:00,758 INFO Thread-8 :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-metadata.json +2022-07-29 18:57:00,758 INFO Thread-8 :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log +2022-07-29 18:57:00,758 INFO Thread-8 :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/requirements.txt +2022-07-29 18:57:00,758 INFO Thread-8 :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/code +2022-07-29 18:57:00,864 INFO Thread-12 :165164 [upload_job.py:push():137] Uploaded file /tmp/tmp02y1jub5wandb/ld126f6g-code/run_flax_speech_recognition_ctc.py +2022-07-29 18:57:02,758 INFO Thread-8 :165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log +2022-07-29 18:57:04,759 INFO Thread-8 :165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log +2022-07-29 18:57:06,760 INFO Thread-8 :165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log +2022-07-29 18:57:07,150 DEBUG SenderThread:165164 [sender.py:send():234] send: telemetry +2022-07-29 18:57:07,150 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:07,151 DEBUG SenderThread:165164 [sender.py:send():234] send: exit +2022-07-29 18:57:07,151 INFO SenderThread:165164 [sender.py:send_exit():366] handling exit code: 1 +2022-07-29 18:57:07,151 INFO SenderThread:165164 [sender.py:send_exit():368] handling runtime: 9 +2022-07-29 18:57:07,152 INFO SenderThread:165164 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 18:57:07,152 INFO SenderThread:165164 [sender.py:send_exit():374] send defer +2022-07-29 18:57:07,152 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:07,153 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:57:07,153 INFO HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-29 18:57:07,153 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: defer +2022-07-29 18:57:07,153 INFO SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-29 18:57:07,153 INFO SenderThread:165164 [sender.py:transition_state():387] send defer: 1 +2022-07-29 18:57:07,153 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:57:07,153 INFO HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-29 18:57:07,230 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: defer +2022-07-29 18:57:07,231 INFO SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-29 18:57:07,231 INFO SenderThread:165164 [sender.py:transition_state():387] send defer: 2 +2022-07-29 18:57:07,231 DEBUG SenderThread:165164 [sender.py:send():234] send: stats +2022-07-29 18:57:07,232 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:57:07,232 INFO HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-29 18:57:07,232 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: defer +2022-07-29 18:57:07,232 INFO SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-29 18:57:07,232 INFO SenderThread:165164 [sender.py:transition_state():387] send defer: 3 +2022-07-29 18:57:07,232 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:57:07,232 INFO HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-29 18:57:07,232 DEBUG SenderThread:165164 [sender.py:send():234] send: summary +2022-07-29 18:57:07,233 INFO SenderThread:165164 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 18:57:07,233 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: defer +2022-07-29 18:57:07,233 INFO SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-29 18:57:07,233 INFO SenderThread:165164 [sender.py:transition_state():387] send defer: 4 +2022-07-29 18:57:07,233 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:57:07,233 INFO HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-29 18:57:07,234 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: defer +2022-07-29 18:57:07,234 INFO SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-29 18:57:07,254 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:07,439 INFO SenderThread:165164 [sender.py:transition_state():387] send defer: 5 +2022-07-29 18:57:07,439 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:07,439 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:57:07,439 INFO HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-29 18:57:07,440 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: defer +2022-07-29 18:57:07,440 INFO SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-29 18:57:07,440 INFO SenderThread:165164 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-29 18:57:07,540 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:07,760 INFO Thread-8 :165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/config.yaml +2022-07-29 18:57:07,761 INFO SenderThread:165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json +2022-07-29 18:57:07,761 INFO SenderThread:165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log +2022-07-29 18:57:07,761 INFO SenderThread:165164 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files +2022-07-29 18:57:07,761 INFO SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/config.yaml config.yaml +2022-07-29 18:57:07,761 INFO SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/requirements.txt requirements.txt +2022-07-29 18:57:07,762 INFO SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log output.log +2022-07-29 18:57:07,762 INFO SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json wandb-summary.json +2022-07-29 18:57:07,762 INFO SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-metadata.json wandb-metadata.json +2022-07-29 18:57:07,765 INFO SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-29 18:57:07,765 INFO SenderThread:165164 [sender.py:transition_state():387] send defer: 6 +2022-07-29 18:57:07,765 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:07,766 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:57:07,766 INFO HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-29 18:57:07,768 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: defer +2022-07-29 18:57:07,768 INFO SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-29 18:57:07,768 INFO SenderThread:165164 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 18:57:07,867 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:07,867 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:07,968 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:07,968 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:08,070 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:08,070 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:08,171 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:08,171 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:08,235 INFO Thread-16 :165164 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json +2022-07-29 18:57:08,266 INFO Thread-13 :165164 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/config.yaml +2022-07-29 18:57:08,268 INFO Thread-15 :165164 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log +2022-07-29 18:57:08,273 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:08,273 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:08,324 INFO Thread-14 :165164 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/requirements.txt +2022-07-29 18:57:08,374 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:08,374 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:08,476 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:08,476 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:08,524 INFO Thread-7 :165164 [sender.py:transition_state():387] send defer: 7 +2022-07-29 18:57:08,524 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:57:08,524 INFO HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-29 18:57:08,525 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: defer +2022-07-29 18:57:08,525 INFO SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-29 18:57:08,577 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:08,677 INFO SenderThread:165164 [sender.py:transition_state():387] send defer: 8 +2022-07-29 18:57:08,677 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:08,677 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:57:08,677 INFO HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-29 18:57:08,677 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: defer +2022-07-29 18:57:08,677 INFO SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-29 18:57:08,677 INFO SenderThread:165164 [sender.py:transition_state():387] send defer: 9 +2022-07-29 18:57:08,678 DEBUG SenderThread:165164 [sender.py:send():234] send: final +2022-07-29 18:57:08,678 DEBUG SenderThread:165164 [sender.py:send():234] send: footer +2022-07-29 18:57:08,678 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer +2022-07-29 18:57:08,678 INFO HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-29 18:57:08,678 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: defer +2022-07-29 18:57:08,678 INFO SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-29 18:57:08,778 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 18:57:08,778 DEBUG SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 18:57:08,778 INFO SenderThread:165164 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 18:57:09,031 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: get_summary +2022-07-29 18:57:09,032 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-29 18:57:09,032 DEBUG HandlerThread:165164 [handler.py:handle_request():130] handle_request: shutdown +2022-07-29 18:57:09,032 INFO HandlerThread:165164 [handler.py:finish():731] shutting down handler +2022-07-29 18:57:09,678 INFO WriterThread:165164 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/run-3cqptots.wandb +2022-07-29 18:57:10,030 INFO SenderThread:165164 [sender.py:finish():1070] shutting down sender +2022-07-29 18:57:10,030 INFO SenderThread:165164 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 18:57:10,030 INFO SenderThread:165164 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 18:57:10,033 INFO MainThread:165164 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220729_185656-3cqptots/logs/debug.log b/wandb/run-20220729_185656-3cqptots/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..9068a1c0745ef93961abd3bfbf0149df54529872 --- /dev/null +++ b/wandb/run-20220729_185656-3cqptots/logs/debug.log @@ -0,0 +1,139 @@ +2022-07-29 18:56:56,699 INFO MainThread:163875 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-29 18:56:56,700 INFO MainThread:163875 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-29 18:56:56,700 INFO MainThread:163875 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/logs/debug.log +2022-07-29 18:56:56,700 INFO MainThread:163875 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/logs/debug-internal.log +2022-07-29 18:56:56,700 INFO MainThread:163875 [wandb_init.py:init():404] calling init triggers +2022-07-29 18:56:56,700 INFO MainThread:163875 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-29 18:56:56,700 INFO MainThread:163875 [wandb_init.py:init():460] starting backend +2022-07-29 18:56:56,700 INFO MainThread:163875 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-29 18:56:56,728 INFO MainThread:163875 [backend.py:ensure_launched():216] starting backend process... +2022-07-29 18:56:56,753 INFO MainThread:163875 [backend.py:ensure_launched():221] started backend process with pid: 165164 +2022-07-29 18:56:56,754 INFO MainThread:163875 [wandb_init.py:init():469] backend started and connected +2022-07-29 18:56:56,770 INFO MainThread:163875 [wandb_init.py:init():533] updated telemetry +2022-07-29 18:56:56,833 INFO MainThread:163875 [wandb_init.py:init():563] communicating current version +2022-07-29 18:56:57,563 INFO MainThread:163875 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-29 18:56:57,564 INFO MainThread:163875 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-29 18:56:57,751 INFO MainThread:163875 [wandb_init.py:init():606] starting run threads in backend +2022-07-29 18:57:00,108 INFO MainThread:163875 [wandb_run.py:_console_start():1810] atexit reg +2022-07-29 18:57:00,108 INFO MainThread:163875 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-29 18:57:00,109 INFO MainThread:163875 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-29 18:57:00,111 INFO MainThread:163875 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-29 18:57:00,111 INFO MainThread:163875 [wandb_init.py:init():633] run started, returning control to user process +2022-07-29 18:57:04,664 INFO MainThread:163875 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-29 18:57:04,668 INFO MainThread:163875 [wandb_run.py:_restore():1752] restore +2022-07-29 18:57:07,153 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 73701 +} + +2022-07-29 18:57:07,439 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 73701 +} + +2022-07-29 18:57:07,766 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 85482 +} + +2022-07-29 18:57:07,867 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 85482 +} + +2022-07-29 18:57:07,969 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85482 + total_bytes: 85482 +} + +2022-07-29 18:57:08,071 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85482 + total_bytes: 85482 +} + +2022-07-29 18:57:08,172 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85482 + total_bytes: 85482 +} + +2022-07-29 18:57:08,273 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85482 + total_bytes: 85482 +} + +2022-07-29 18:57:08,375 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85482 + total_bytes: 85482 +} + +2022-07-29 18:57:08,476 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85482 + total_bytes: 85482 +} + +2022-07-29 18:57:08,677 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85482 + total_bytes: 85482 +} + +2022-07-29 18:57:09,030 INFO MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85482 + total_bytes: 85482 +} +local_info { +} + +2022-07-29 18:57:10,492 INFO MainThread:163875 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220729_185656-3cqptots/run-3cqptots.wandb b/wandb/run-20220729_185656-3cqptots/run-3cqptots.wandb new file mode 100644 index 0000000000000000000000000000000000000000..d7154a03bc448ca82531e4cd650ba066904bae17 --- /dev/null +++ b/wandb/run-20220729_185656-3cqptots/run-3cqptots.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:375e2b86666767f13b3eeedcc308447a00f857d25fdb68663f6847ba93879a70 +size 9434 diff --git a/wandb/run-20220729_190139-3h19ae7o/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_190139-3h19ae7o/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe --- /dev/null +++ b/wandb/run-20220729_190139-3h19ae7o/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1596 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_190139-3h19ae7o/files/config.yaml b/wandb/run-20220729_190139-3h19ae7o/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ebac1590d3ae1282df9c04e12acc5c399dcd15ce --- /dev/null +++ b/wandb/run-20220729_190139-3h19ae7o/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659121299 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_190139-3h19ae7o/files/output.log b/wandb/run-20220729_190139-3h19ae7o/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..65b7121bb800f0470c1c62bbb9888355968ba245 --- /dev/null +++ b/wandb/run-20220729_190139-3h19ae7o/files/output.log @@ -0,0 +1,158 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_19-01-35_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=32, +per_device_train_batch_size=32, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 85.67it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 450.21it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow + 0%| | 0/256 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 870, in main + raw_datasets = make_dataset(seed=training_args.seed) + File "run_flax_speech_recognition_ctc.py", line 811, in make_dataset + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 865, in filter + { + File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 866, in + k: dataset.filter( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper + out = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2904, in filter + indices = self.map( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2387, in map + return self._map_single( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 557, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper + out = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2775, in _map_single + batch = apply_function_on_filtered_inputs( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2655, in apply_function_on_filtered_inputs + processed_inputs = function(*fn_args, *additional_args, **fn_kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2347, in decorated + result = f(decorated_item, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in get_indices_from_mask_function + example = {key: batch[key][i] for key in batch} + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in + example = {key: batch[key][i] for key in batch} + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 133, in __getitem__ + values = [ + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 134, in + decode_nested_example(self.features[key], value) if value is not None else None for value in values + File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1260, in decode_nested_example + return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) if obj is not None else None + File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 144, in decode_example + array, sampling_rate = self._decode_mp3(file if file else path) + File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 293, in _decode_mp3 + array, sampling_rate = torchaudio.load(path_or_file, format="mp3") + File "/data/flax/lib/python3.8/site-packages/torchaudio/backend/sox_io_backend.py", line 214, in load + return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format) +NameError: name '_fallback_load_fileobj' is not defined \ No newline at end of file diff --git a/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt b/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f28978ffa0f0a8356a51ec97122622a6097b70e8 --- /dev/null +++ b/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt @@ -0,0 +1,151 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pygments==2.11.1 +pyparsing==3.0.6 +python-dateutil==2.8.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220729_190139-3h19ae7o/files/wandb-metadata.json b/wandb/run-20220729_190139-3h19ae7o/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8f1072c53903ffa2d648d83b22eae0a97ba719b2 --- /dev/null +++ b/wandb/run-20220729_190139-3h19ae7o/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-29T19:01:42.561481", + "startedAt": "2022-07-29T19:01:39.254799", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=32", + "--per_device_eval_batch_size=32", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json b/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a594e25ada167c5ad54a828e8a5b1a6223620b44 --- /dev/null +++ b/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 8}} \ No newline at end of file diff --git a/wandb/run-20220729_190139-3h19ae7o/logs/debug-internal.log b/wandb/run-20220729_190139-3h19ae7o/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..70f5454fc7a56b910ebe3cd1e860e3f4857833a0 --- /dev/null +++ b/wandb/run-20220729_190139-3h19ae7o/logs/debug-internal.log @@ -0,0 +1,148 @@ +2022-07-29 19:01:40,118 INFO MainThread:171545 [internal.py:wandb_internal():87] W&B internal server running at pid: 171545, started at: 2022-07-29 19:01:40.117863 +2022-07-29 19:01:40,120 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: check_version +2022-07-29 19:01:40,120 INFO WriterThread:171545 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/run-3h19ae7o.wandb +2022-07-29 19:01:40,121 DEBUG SenderThread:171545 [sender.py:send():234] send: header +2022-07-29 19:01:40,121 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: check_version +2022-07-29 19:01:40,160 DEBUG SenderThread:171545 [sender.py:send():234] send: run +2022-07-29 19:01:40,329 INFO SenderThread:171545 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files +2022-07-29 19:01:40,329 INFO SenderThread:171545 [sender.py:_start_run_threads():804] run started: 3h19ae7o with start time 1659121299 +2022-07-29 19:01:40,329 DEBUG SenderThread:171545 [sender.py:send():234] send: summary +2022-07-29 19:01:40,329 INFO SenderThread:171545 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:01:40,330 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: run_start +2022-07-29 19:01:41,331 INFO Thread-8 :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json +2022-07-29 19:01:42,561 DEBUG HandlerThread:171545 [meta.py:__init__():40] meta init +2022-07-29 19:01:42,561 DEBUG HandlerThread:171545 [meta.py:__init__():54] meta init done +2022-07-29 19:01:42,561 DEBUG HandlerThread:171545 [meta.py:probe():214] probe +2022-07-29 19:01:42,563 DEBUG HandlerThread:171545 [meta.py:_setup_git():204] setup git +2022-07-29 19:01:42,595 DEBUG HandlerThread:171545 [meta.py:_setup_git():211] setup git done +2022-07-29 19:01:42,595 DEBUG HandlerThread:171545 [meta.py:_save_code():92] save code +2022-07-29 19:01:42,606 DEBUG HandlerThread:171545 [meta.py:_save_code():113] save code done +2022-07-29 19:01:42,607 DEBUG HandlerThread:171545 [meta.py:_save_patches():130] save patches +2022-07-29 19:01:42,661 DEBUG HandlerThread:171545 [meta.py:_save_patches():172] save patches done +2022-07-29 19:01:42,661 DEBUG HandlerThread:171545 [meta.py:_save_pip():58] save pip +2022-07-29 19:01:42,662 DEBUG HandlerThread:171545 [meta.py:_save_pip():72] save pip done +2022-07-29 19:01:42,662 DEBUG HandlerThread:171545 [meta.py:probe():252] probe done +2022-07-29 19:01:42,665 DEBUG SenderThread:171545 [sender.py:send():234] send: files +2022-07-29 19:01:42,665 INFO SenderThread:171545 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-29 19:01:42,666 INFO SenderThread:171545 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-29 19:01:42,671 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:01:42,672 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:01:43,146 INFO Thread-11 :171545 [upload_job.py:push():137] Uploaded file /tmp/tmpw0fdeimbwandb/1v9fa8k6-wandb-metadata.json +2022-07-29 19:01:43,334 INFO Thread-8 :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log +2022-07-29 19:01:43,334 INFO Thread-8 :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt +2022-07-29 19:01:43,334 INFO Thread-8 :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/code/run_flax_speech_recognition_ctc.py +2022-07-29 19:01:43,334 INFO Thread-8 :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-metadata.json +2022-07-29 19:01:43,334 INFO Thread-8 :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/code +2022-07-29 19:01:43,355 INFO Thread-12 :171545 [upload_job.py:push():137] Uploaded file /tmp/tmpw0fdeimbwandb/2vqa3jvn-code/run_flax_speech_recognition_ctc.py +2022-07-29 19:01:45,335 INFO Thread-8 :171545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log +2022-07-29 19:01:47,335 INFO Thread-8 :171545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log +2022-07-29 19:01:49,207 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:49,336 INFO Thread-8 :171545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log +2022-07-29 19:01:49,737 DEBUG SenderThread:171545 [sender.py:send():234] send: telemetry +2022-07-29 19:01:49,737 DEBUG SenderThread:171545 [sender.py:send():234] send: exit +2022-07-29 19:01:49,739 INFO SenderThread:171545 [sender.py:send_exit():366] handling exit code: 1 +2022-07-29 19:01:49,739 INFO SenderThread:171545 [sender.py:send_exit():368] handling runtime: 8 +2022-07-29 19:01:49,740 INFO SenderThread:171545 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:01:49,740 INFO SenderThread:171545 [sender.py:send_exit():374] send defer +2022-07-29 19:01:49,740 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:49,741 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:01:49,741 INFO HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-29 19:01:49,741 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: defer +2022-07-29 19:01:49,741 INFO SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-29 19:01:49,741 INFO SenderThread:171545 [sender.py:transition_state():387] send defer: 1 +2022-07-29 19:01:49,741 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:01:49,742 INFO HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-29 19:01:49,792 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: defer +2022-07-29 19:01:49,793 INFO SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-29 19:01:49,793 INFO SenderThread:171545 [sender.py:transition_state():387] send defer: 2 +2022-07-29 19:01:49,793 DEBUG SenderThread:171545 [sender.py:send():234] send: stats +2022-07-29 19:01:49,793 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:01:49,793 INFO HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-29 19:01:49,793 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: defer +2022-07-29 19:01:49,793 INFO SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-29 19:01:49,793 INFO SenderThread:171545 [sender.py:transition_state():387] send defer: 3 +2022-07-29 19:01:49,794 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:01:49,794 INFO HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-29 19:01:49,794 DEBUG SenderThread:171545 [sender.py:send():234] send: summary +2022-07-29 19:01:49,794 INFO SenderThread:171545 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:01:49,794 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: defer +2022-07-29 19:01:49,794 INFO SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-29 19:01:49,794 INFO SenderThread:171545 [sender.py:transition_state():387] send defer: 4 +2022-07-29 19:01:49,795 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:01:49,795 INFO HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-29 19:01:49,795 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: defer +2022-07-29 19:01:49,795 INFO SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-29 19:01:49,842 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:49,961 INFO SenderThread:171545 [sender.py:transition_state():387] send defer: 5 +2022-07-29 19:01:49,961 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:49,962 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:01:49,962 INFO HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-29 19:01:49,962 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: defer +2022-07-29 19:01:49,962 INFO SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-29 19:01:49,962 INFO SenderThread:171545 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-29 19:01:50,063 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:50,337 INFO Thread-8 :171545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json +2022-07-29 19:01:50,337 INFO SenderThread:171545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/config.yaml +2022-07-29 19:01:50,338 INFO SenderThread:171545 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files +2022-07-29 19:01:50,338 INFO SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/config.yaml config.yaml +2022-07-29 19:01:50,338 INFO SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt requirements.txt +2022-07-29 19:01:50,338 INFO SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log output.log +2022-07-29 19:01:50,338 INFO SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json wandb-summary.json +2022-07-29 19:01:50,338 INFO SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-metadata.json wandb-metadata.json +2022-07-29 19:01:50,339 INFO SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-29 19:01:50,339 INFO SenderThread:171545 [sender.py:transition_state():387] send defer: 6 +2022-07-29 19:01:50,339 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:50,344 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:01:50,344 INFO HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-29 19:01:50,345 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: defer +2022-07-29 19:01:50,345 INFO SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-29 19:01:50,345 INFO SenderThread:171545 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 19:01:50,446 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:50,446 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:50,548 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:50,548 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:50,650 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:50,650 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:50,752 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:50,752 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:50,829 INFO Thread-13 :171545 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/config.yaml +2022-07-29 19:01:50,839 INFO Thread-15 :171545 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log +2022-07-29 19:01:50,854 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:50,854 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:50,862 INFO Thread-16 :171545 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json +2022-07-29 19:01:50,877 INFO Thread-14 :171545 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt +2022-07-29 19:01:50,955 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:50,956 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:51,057 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:51,058 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:51,078 INFO Thread-7 :171545 [sender.py:transition_state():387] send defer: 7 +2022-07-29 19:01:51,078 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:01:51,078 INFO HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-29 19:01:51,078 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: defer +2022-07-29 19:01:51,078 INFO SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-29 19:01:51,159 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:51,533 INFO SenderThread:171545 [sender.py:transition_state():387] send defer: 8 +2022-07-29 19:01:51,533 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:51,534 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:01:51,534 INFO HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-29 19:01:51,534 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: defer +2022-07-29 19:01:51,534 INFO SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-29 19:01:51,534 INFO SenderThread:171545 [sender.py:transition_state():387] send defer: 9 +2022-07-29 19:01:51,535 DEBUG SenderThread:171545 [sender.py:send():234] send: final +2022-07-29 19:01:51,535 DEBUG SenderThread:171545 [sender.py:send():234] send: footer +2022-07-29 19:01:51,535 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:01:51,535 INFO HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-29 19:01:51,535 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: defer +2022-07-29 19:01:51,536 INFO SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-29 19:01:51,635 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:01:51,636 DEBUG SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:01:51,636 INFO SenderThread:171545 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 19:01:51,944 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: get_summary +2022-07-29 19:01:51,945 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-29 19:01:51,946 DEBUG HandlerThread:171545 [handler.py:handle_request():130] handle_request: shutdown +2022-07-29 19:01:51,946 INFO HandlerThread:171545 [handler.py:finish():731] shutting down handler +2022-07-29 19:01:52,536 INFO WriterThread:171545 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/run-3h19ae7o.wandb +2022-07-29 19:01:52,943 INFO SenderThread:171545 [sender.py:finish():1070] shutting down sender +2022-07-29 19:01:52,943 INFO SenderThread:171545 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 19:01:52,943 INFO SenderThread:171545 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 19:01:52,945 INFO MainThread:171545 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220729_190139-3h19ae7o/logs/debug.log b/wandb/run-20220729_190139-3h19ae7o/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..6c6e24bdd427c5deafb542d5aaee9fb2c9cb530b --- /dev/null +++ b/wandb/run-20220729_190139-3h19ae7o/logs/debug.log @@ -0,0 +1,139 @@ +2022-07-29 19:01:39,256 INFO MainThread:170288 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-29 19:01:39,256 INFO MainThread:170288 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-29 19:01:39,256 INFO MainThread:170288 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/logs/debug.log +2022-07-29 19:01:39,256 INFO MainThread:170288 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/logs/debug-internal.log +2022-07-29 19:01:39,256 INFO MainThread:170288 [wandb_init.py:init():404] calling init triggers +2022-07-29 19:01:39,256 INFO MainThread:170288 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-29 19:01:39,256 INFO MainThread:170288 [wandb_init.py:init():460] starting backend +2022-07-29 19:01:39,256 INFO MainThread:170288 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-29 19:01:39,285 INFO MainThread:170288 [backend.py:ensure_launched():216] starting backend process... +2022-07-29 19:01:39,312 INFO MainThread:170288 [backend.py:ensure_launched():221] started backend process with pid: 171545 +2022-07-29 19:01:39,314 INFO MainThread:170288 [wandb_init.py:init():469] backend started and connected +2022-07-29 19:01:39,327 INFO MainThread:170288 [wandb_init.py:init():533] updated telemetry +2022-07-29 19:01:39,390 INFO MainThread:170288 [wandb_init.py:init():563] communicating current version +2022-07-29 19:01:40,159 INFO MainThread:170288 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-29 19:01:40,159 INFO MainThread:170288 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-29 19:01:40,330 INFO MainThread:170288 [wandb_init.py:init():606] starting run threads in backend +2022-07-29 19:01:42,671 INFO MainThread:170288 [wandb_run.py:_console_start():1810] atexit reg +2022-07-29 19:01:42,671 INFO MainThread:170288 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-29 19:01:42,672 INFO MainThread:170288 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-29 19:01:42,674 INFO MainThread:170288 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-29 19:01:42,674 INFO MainThread:170288 [wandb_init.py:init():633] run started, returning control to user process +2022-07-29 19:01:47,172 INFO MainThread:170288 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-29 19:01:47,176 INFO MainThread:170288 [wandb_run.py:_restore():1752] restore +2022-07-29 19:01:49,741 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 73701 +} + +2022-07-29 19:01:49,962 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 73701 +} + +2022-07-29 19:01:50,345 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 85493 +} + +2022-07-29 19:01:50,447 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 85493 +} + +2022-07-29 19:01:50,549 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:01:50,651 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:01:50,753 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:01:50,855 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:01:50,956 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:01:51,058 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:01:51,534 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:01:51,943 INFO MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} +local_info { +} + +2022-07-29 19:01:53,465 INFO MainThread:170288 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220729_190139-3h19ae7o/run-3h19ae7o.wandb b/wandb/run-20220729_190139-3h19ae7o/run-3h19ae7o.wandb new file mode 100644 index 0000000000000000000000000000000000000000..613942396072c70d70b572e40bd0ee85621fff3f --- /dev/null +++ b/wandb/run-20220729_190139-3h19ae7o/run-3h19ae7o.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d731128588ab4151c70d7ea1d808dec40df947170760cc941be5a80239eb9f9 +size 9438 diff --git a/wandb/run-20220729_190441-29m5vt4h/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_190441-29m5vt4h/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe --- /dev/null +++ b/wandb/run-20220729_190441-29m5vt4h/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1596 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_190441-29m5vt4h/files/config.yaml b/wandb/run-20220729_190441-29m5vt4h/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..538fbcf8db2bff202565b523a13c42281ec62c2c --- /dev/null +++ b/wandb/run-20220729_190441-29m5vt4h/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659121481 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_190441-29m5vt4h/files/output.log b/wandb/run-20220729_190441-29m5vt4h/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..b8772249f7609f1f06768913439ff58ee5f273c2 --- /dev/null +++ b/wandb/run-20220729_190441-29m5vt4h/files/output.log @@ -0,0 +1,158 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_19-04-37_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=32, +per_device_train_batch_size=32, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 83.41it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 468.45it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow + 0%| | 0/256 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 870, in main + raw_datasets = make_dataset(seed=training_args.seed) + File "run_flax_speech_recognition_ctc.py", line 811, in make_dataset + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 865, in filter + { + File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 866, in + k: dataset.filter( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper + out = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2904, in filter + indices = self.map( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2387, in map + return self._map_single( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 557, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper + out = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2775, in _map_single + batch = apply_function_on_filtered_inputs( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2655, in apply_function_on_filtered_inputs + processed_inputs = function(*fn_args, *additional_args, **fn_kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2347, in decorated + result = f(decorated_item, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in get_indices_from_mask_function + example = {key: batch[key][i] for key in batch} + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in + example = {key: batch[key][i] for key in batch} + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 133, in __getitem__ + values = [ + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 134, in + decode_nested_example(self.features[key], value) if value is not None else None for value in values + File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1260, in decode_nested_example + return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) if obj is not None else None + File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 144, in decode_example + array, sampling_rate = self._decode_mp3(file if file else path) + File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 293, in _decode_mp3 + array, sampling_rate = torchaudio.load(path_or_file, format="mp3") + File "/data/flax/lib/python3.8/site-packages/torchaudio/backend/sox_io_backend.py", line 214, in load + return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format) +NameError: name '_fallback_load_fileobj' is not defined \ No newline at end of file diff --git a/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt b/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f28978ffa0f0a8356a51ec97122622a6097b70e8 --- /dev/null +++ b/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt @@ -0,0 +1,151 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pygments==2.11.1 +pyparsing==3.0.6 +python-dateutil==2.8.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220729_190441-29m5vt4h/files/wandb-metadata.json b/wandb/run-20220729_190441-29m5vt4h/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..922996f7b39924b8a9d6e7dbb0ffdb069ca569aa --- /dev/null +++ b/wandb/run-20220729_190441-29m5vt4h/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-29T19:04:44.947822", + "startedAt": "2022-07-29T19:04:41.750886", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=32", + "--per_device_eval_batch_size=32", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json b/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a594e25ada167c5ad54a828e8a5b1a6223620b44 --- /dev/null +++ b/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 8}} \ No newline at end of file diff --git a/wandb/run-20220729_190441-29m5vt4h/logs/debug-internal.log b/wandb/run-20220729_190441-29m5vt4h/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..4fbe4426a20a69a5ad22e1918dfe03cc516b30ea --- /dev/null +++ b/wandb/run-20220729_190441-29m5vt4h/logs/debug-internal.log @@ -0,0 +1,170 @@ +2022-07-29 19:04:42,573 INFO MainThread:176849 [internal.py:wandb_internal():87] W&B internal server running at pid: 176849, started at: 2022-07-29 19:04:42.573053 +2022-07-29 19:04:42,575 INFO WriterThread:176849 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/run-29m5vt4h.wandb +2022-07-29 19:04:42,575 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: check_version +2022-07-29 19:04:42,576 DEBUG SenderThread:176849 [sender.py:send():234] send: header +2022-07-29 19:04:42,576 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: check_version +2022-07-29 19:04:42,610 DEBUG SenderThread:176849 [sender.py:send():234] send: run +2022-07-29 19:04:42,771 INFO SenderThread:176849 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files +2022-07-29 19:04:42,771 INFO SenderThread:176849 [sender.py:_start_run_threads():804] run started: 29m5vt4h with start time 1659121481 +2022-07-29 19:04:42,772 DEBUG SenderThread:176849 [sender.py:send():234] send: summary +2022-07-29 19:04:42,772 INFO SenderThread:176849 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:04:42,773 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: run_start +2022-07-29 19:04:43,773 INFO Thread-8 :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json +2022-07-29 19:04:44,947 DEBUG HandlerThread:176849 [meta.py:__init__():40] meta init +2022-07-29 19:04:44,947 DEBUG HandlerThread:176849 [meta.py:__init__():54] meta init done +2022-07-29 19:04:44,947 DEBUG HandlerThread:176849 [meta.py:probe():214] probe +2022-07-29 19:04:44,948 DEBUG HandlerThread:176849 [meta.py:_setup_git():204] setup git +2022-07-29 19:04:44,978 DEBUG HandlerThread:176849 [meta.py:_setup_git():211] setup git done +2022-07-29 19:04:44,978 DEBUG HandlerThread:176849 [meta.py:_save_code():92] save code +2022-07-29 19:04:44,989 DEBUG HandlerThread:176849 [meta.py:_save_code():113] save code done +2022-07-29 19:04:44,989 DEBUG HandlerThread:176849 [meta.py:_save_patches():130] save patches +2022-07-29 19:04:45,043 DEBUG HandlerThread:176849 [meta.py:_save_patches():172] save patches done +2022-07-29 19:04:45,044 DEBUG HandlerThread:176849 [meta.py:_save_pip():58] save pip +2022-07-29 19:04:45,044 DEBUG HandlerThread:176849 [meta.py:_save_pip():72] save pip done +2022-07-29 19:04:45,044 DEBUG HandlerThread:176849 [meta.py:probe():252] probe done +2022-07-29 19:04:45,047 DEBUG SenderThread:176849 [sender.py:send():234] send: files +2022-07-29 19:04:45,047 INFO SenderThread:176849 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-29 19:04:45,047 INFO SenderThread:176849 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-29 19:04:45,053 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:04:45,053 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:04:45,512 INFO Thread-11 :176849 [upload_job.py:push():137] Uploaded file /tmp/tmp8vey1y7dwandb/10dh41rh-wandb-metadata.json +2022-07-29 19:04:45,767 INFO Thread-12 :176849 [upload_job.py:push():137] Uploaded file /tmp/tmp8vey1y7dwandb/31d5qx94-code/run_flax_speech_recognition_ctc.py +2022-07-29 19:04:45,793 INFO Thread-8 :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt +2022-07-29 19:04:45,793 INFO Thread-8 :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log +2022-07-29 19:04:45,793 INFO Thread-8 :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/code/run_flax_speech_recognition_ctc.py +2022-07-29 19:04:45,793 INFO Thread-8 :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-metadata.json +2022-07-29 19:04:45,793 INFO Thread-8 :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/code +2022-07-29 19:04:47,794 INFO Thread-8 :176849 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log +2022-07-29 19:04:49,794 INFO Thread-8 :176849 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log +2022-07-29 19:04:51,589 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:51,589 DEBUG SenderThread:176849 [sender.py:send():234] send: telemetry +2022-07-29 19:04:51,589 DEBUG SenderThread:176849 [sender.py:send():234] send: exit +2022-07-29 19:04:51,589 INFO SenderThread:176849 [sender.py:send_exit():366] handling exit code: 1 +2022-07-29 19:04:51,589 INFO SenderThread:176849 [sender.py:send_exit():368] handling runtime: 8 +2022-07-29 19:04:51,590 INFO SenderThread:176849 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:04:51,590 INFO SenderThread:176849 [sender.py:send_exit():374] send defer +2022-07-29 19:04:51,590 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:51,591 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:04:51,591 INFO HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-29 19:04:51,591 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: defer +2022-07-29 19:04:51,591 INFO SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-29 19:04:51,591 INFO SenderThread:176849 [sender.py:transition_state():387] send defer: 1 +2022-07-29 19:04:51,591 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:04:51,591 INFO HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-29 19:04:51,676 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: defer +2022-07-29 19:04:51,676 INFO SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-29 19:04:51,676 INFO SenderThread:176849 [sender.py:transition_state():387] send defer: 2 +2022-07-29 19:04:51,676 DEBUG SenderThread:176849 [sender.py:send():234] send: stats +2022-07-29 19:04:51,677 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:04:51,677 INFO HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-29 19:04:51,677 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: defer +2022-07-29 19:04:51,677 INFO SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-29 19:04:51,677 INFO SenderThread:176849 [sender.py:transition_state():387] send defer: 3 +2022-07-29 19:04:51,677 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:04:51,677 INFO HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-29 19:04:51,677 DEBUG SenderThread:176849 [sender.py:send():234] send: summary +2022-07-29 19:04:51,678 INFO SenderThread:176849 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:04:51,678 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: defer +2022-07-29 19:04:51,678 INFO SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-29 19:04:51,678 INFO SenderThread:176849 [sender.py:transition_state():387] send defer: 4 +2022-07-29 19:04:51,678 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:04:51,678 INFO HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-29 19:04:51,678 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: defer +2022-07-29 19:04:51,678 INFO SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-29 19:04:51,692 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:51,795 INFO Thread-8 :176849 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log +2022-07-29 19:04:51,795 INFO Thread-8 :176849 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json +2022-07-29 19:04:51,839 INFO SenderThread:176849 [sender.py:transition_state():387] send defer: 5 +2022-07-29 19:04:51,839 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:51,839 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:04:51,840 INFO HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-29 19:04:51,840 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: defer +2022-07-29 19:04:51,840 INFO SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-29 19:04:51,840 INFO SenderThread:176849 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-29 19:04:51,940 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:52,796 INFO Thread-8 :176849 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/config.yaml +2022-07-29 19:04:52,796 INFO SenderThread:176849 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files +2022-07-29 19:04:52,796 INFO SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/config.yaml config.yaml +2022-07-29 19:04:52,796 INFO SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt requirements.txt +2022-07-29 19:04:52,797 INFO SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log output.log +2022-07-29 19:04:52,797 INFO SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json wandb-summary.json +2022-07-29 19:04:52,797 INFO SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-metadata.json wandb-metadata.json +2022-07-29 19:04:52,802 INFO SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-29 19:04:52,802 INFO SenderThread:176849 [sender.py:transition_state():387] send defer: 6 +2022-07-29 19:04:52,802 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:52,804 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:04:52,804 INFO HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-29 19:04:52,808 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: defer +2022-07-29 19:04:52,808 INFO SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-29 19:04:52,808 INFO SenderThread:176849 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 19:04:52,904 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:52,905 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:53,006 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:53,006 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:53,108 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:53,108 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:53,209 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:53,209 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:53,259 INFO Thread-15 :176849 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log +2022-07-29 19:04:53,272 INFO Thread-16 :176849 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json +2022-07-29 19:04:53,276 INFO Thread-13 :176849 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/config.yaml +2022-07-29 19:04:53,311 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:53,311 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:53,412 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:53,412 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:53,514 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:53,514 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:53,615 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:53,616 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:53,717 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:53,717 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:53,818 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:53,819 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:53,920 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:53,920 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:54,021 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:54,021 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:54,123 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:54,123 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:54,224 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:54,225 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:54,326 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:54,326 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:54,428 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:54,428 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:54,455 INFO Thread-14 :176849 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt +2022-07-29 19:04:54,529 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:54,529 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:54,631 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:54,631 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:54,655 INFO Thread-7 :176849 [sender.py:transition_state():387] send defer: 7 +2022-07-29 19:04:54,656 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:04:54,656 INFO HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-29 19:04:54,656 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: defer +2022-07-29 19:04:54,656 INFO SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-29 19:04:54,732 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:54,792 INFO SenderThread:176849 [sender.py:transition_state():387] send defer: 8 +2022-07-29 19:04:54,792 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:54,792 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:04:54,792 INFO HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-29 19:04:54,792 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: defer +2022-07-29 19:04:54,793 INFO SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-29 19:04:54,793 INFO SenderThread:176849 [sender.py:transition_state():387] send defer: 9 +2022-07-29 19:04:54,793 DEBUG SenderThread:176849 [sender.py:send():234] send: final +2022-07-29 19:04:54,793 DEBUG SenderThread:176849 [sender.py:send():234] send: footer +2022-07-29 19:04:54,793 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:04:54,793 INFO HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-29 19:04:54,793 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: defer +2022-07-29 19:04:54,793 INFO SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-29 19:04:54,893 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:04:54,893 DEBUG SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:04:54,893 INFO SenderThread:176849 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 19:04:55,167 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: get_summary +2022-07-29 19:04:55,168 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-29 19:04:55,168 DEBUG HandlerThread:176849 [handler.py:handle_request():130] handle_request: shutdown +2022-07-29 19:04:55,168 INFO HandlerThread:176849 [handler.py:finish():731] shutting down handler +2022-07-29 19:04:55,794 INFO WriterThread:176849 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/run-29m5vt4h.wandb +2022-07-29 19:04:56,166 INFO SenderThread:176849 [sender.py:finish():1070] shutting down sender +2022-07-29 19:04:56,166 INFO SenderThread:176849 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 19:04:56,167 INFO SenderThread:176849 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 19:04:56,169 INFO MainThread:176849 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220729_190441-29m5vt4h/logs/debug.log b/wandb/run-20220729_190441-29m5vt4h/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..41baa6d8dbc1c310a3d6449dd7be2baa482e3b76 --- /dev/null +++ b/wandb/run-20220729_190441-29m5vt4h/logs/debug.log @@ -0,0 +1,238 @@ +2022-07-29 19:04:41,752 INFO MainThread:175598 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-29 19:04:41,752 INFO MainThread:175598 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-29 19:04:41,752 INFO MainThread:175598 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/logs/debug.log +2022-07-29 19:04:41,752 INFO MainThread:175598 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/logs/debug-internal.log +2022-07-29 19:04:41,752 INFO MainThread:175598 [wandb_init.py:init():404] calling init triggers +2022-07-29 19:04:41,752 INFO MainThread:175598 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-29 19:04:41,752 INFO MainThread:175598 [wandb_init.py:init():460] starting backend +2022-07-29 19:04:41,752 INFO MainThread:175598 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-29 19:04:41,779 INFO MainThread:175598 [backend.py:ensure_launched():216] starting backend process... +2022-07-29 19:04:41,803 INFO MainThread:175598 [backend.py:ensure_launched():221] started backend process with pid: 176849 +2022-07-29 19:04:41,805 INFO MainThread:175598 [wandb_init.py:init():469] backend started and connected +2022-07-29 19:04:41,818 INFO MainThread:175598 [wandb_init.py:init():533] updated telemetry +2022-07-29 19:04:41,879 INFO MainThread:175598 [wandb_init.py:init():563] communicating current version +2022-07-29 19:04:42,609 INFO MainThread:175598 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-29 19:04:42,609 INFO MainThread:175598 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-29 19:04:42,773 INFO MainThread:175598 [wandb_init.py:init():606] starting run threads in backend +2022-07-29 19:04:45,051 INFO MainThread:175598 [wandb_run.py:_console_start():1810] atexit reg +2022-07-29 19:04:45,051 INFO MainThread:175598 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-29 19:04:45,052 INFO MainThread:175598 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-29 19:04:45,054 INFO MainThread:175598 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-29 19:04:45,054 INFO MainThread:175598 [wandb_init.py:init():633] run started, returning control to user process +2022-07-29 19:04:49,520 INFO MainThread:175598 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-29 19:04:49,524 INFO MainThread:175598 [wandb_run.py:_restore():1752] restore +2022-07-29 19:04:51,591 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 73701 +} + +2022-07-29 19:04:51,840 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 73701 +} + +2022-07-29 19:04:52,803 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 85493 +} + +2022-07-29 19:04:52,905 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 85493 +} + +2022-07-29 19:04:53,007 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:53,108 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:53,210 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:53,311 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:53,413 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:53,515 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:53,616 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:53,718 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:53,819 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:53,921 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:54,022 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:54,124 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 82799 + total_bytes: 85493 +} + +2022-07-29 19:04:54,225 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:04:54,327 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:04:54,428 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:04:54,530 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:04:54,631 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:04:54,792 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} + +2022-07-29 19:04:55,167 INFO MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85493 + total_bytes: 85493 +} +local_info { +} + +2022-07-29 19:04:56,631 INFO MainThread:175598 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220729_190441-29m5vt4h/run-29m5vt4h.wandb b/wandb/run-20220729_190441-29m5vt4h/run-29m5vt4h.wandb new file mode 100644 index 0000000000000000000000000000000000000000..7481eac19a668d04a92b8cff4911d29fe22c8dbc --- /dev/null +++ b/wandb/run-20220729_190441-29m5vt4h/run-29m5vt4h.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c08406c1386cc17829bf56a85d38f767a77c48664b70f890e428b1b550ee7ef7 +size 9434 diff --git a/wandb/run-20220729_190622-1kplw9z9/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_190622-1kplw9z9/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe --- /dev/null +++ b/wandb/run-20220729_190622-1kplw9z9/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1596 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_190622-1kplw9z9/files/config.yaml b/wandb/run-20220729_190622-1kplw9z9/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cfdbfc2ca4b438beb573c675e79bdc0bafd680cd --- /dev/null +++ b/wandb/run-20220729_190622-1kplw9z9/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659121582 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_190622-1kplw9z9/files/output.log b/wandb/run-20220729_190622-1kplw9z9/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..b071e9229f06da56854b13cc2606bd0a86712a3b --- /dev/null +++ b/wandb/run-20220729_190622-1kplw9z9/files/output.log @@ -0,0 +1,158 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_19-06-17_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=32, +per_device_train_batch_size=32, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 81.67it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 459.65it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow + 0%| | 0/256 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 870, in main + raw_datasets = make_dataset(seed=training_args.seed) + File "run_flax_speech_recognition_ctc.py", line 811, in make_dataset + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 865, in filter + { + File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 866, in + k: dataset.filter( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper + out = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2904, in filter + indices = self.map( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2387, in map + return self._map_single( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 557, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper + out = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2775, in _map_single + batch = apply_function_on_filtered_inputs( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2655, in apply_function_on_filtered_inputs + processed_inputs = function(*fn_args, *additional_args, **fn_kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2347, in decorated + result = f(decorated_item, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in get_indices_from_mask_function + example = {key: batch[key][i] for key in batch} + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in + example = {key: batch[key][i] for key in batch} + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 133, in __getitem__ + values = [ + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 134, in + decode_nested_example(self.features[key], value) if value is not None else None for value in values + File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1260, in decode_nested_example + return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) if obj is not None else None + File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 144, in decode_example + array, sampling_rate = self._decode_mp3(file if file else path) + File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 293, in _decode_mp3 + array, sampling_rate = torchaudio.load(path_or_file, format="mp3") + File "/data/flax/lib/python3.8/site-packages/torchaudio/backend/sox_io_backend.py", line 214, in load + return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format) +NameError: name '_fallback_load_fileobj' is not defined \ No newline at end of file diff --git a/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt b/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..20cb6abc20ac8cace779da721986ddb6a238845d --- /dev/null +++ b/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt @@ -0,0 +1,151 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pygments==2.11.1 +pyparsing==3.0.6 +python-dateutil==2.8.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220729_190622-1kplw9z9/files/wandb-metadata.json b/wandb/run-20220729_190622-1kplw9z9/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8d0b28461c5a702b9121f8ed0b59655e7e5df687 --- /dev/null +++ b/wandb/run-20220729_190622-1kplw9z9/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-29T19:06:25.890824", + "startedAt": "2022-07-29T19:06:22.537403", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=32", + "--per_device_eval_batch_size=32", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json b/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a594e25ada167c5ad54a828e8a5b1a6223620b44 --- /dev/null +++ b/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 8}} \ No newline at end of file diff --git a/wandb/run-20220729_190622-1kplw9z9/logs/debug-internal.log b/wandb/run-20220729_190622-1kplw9z9/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..4823c409a52b35a9998820cbbaf9675a4b616a49 --- /dev/null +++ b/wandb/run-20220729_190622-1kplw9z9/logs/debug-internal.log @@ -0,0 +1,149 @@ +2022-07-29 19:06:23,361 INFO MainThread:179745 [internal.py:wandb_internal():87] W&B internal server running at pid: 179745, started at: 2022-07-29 19:06:23.361487 +2022-07-29 19:06:23,363 INFO WriterThread:179745 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/run-1kplw9z9.wandb +2022-07-29 19:06:23,364 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: check_version +2022-07-29 19:06:23,364 DEBUG SenderThread:179745 [sender.py:send():234] send: header +2022-07-29 19:06:23,365 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: check_version +2022-07-29 19:06:23,400 DEBUG SenderThread:179745 [sender.py:send():234] send: run +2022-07-29 19:06:23,577 INFO SenderThread:179745 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files +2022-07-29 19:06:23,577 INFO SenderThread:179745 [sender.py:_start_run_threads():804] run started: 1kplw9z9 with start time 1659121582 +2022-07-29 19:06:23,578 DEBUG SenderThread:179745 [sender.py:send():234] send: summary +2022-07-29 19:06:23,578 INFO SenderThread:179745 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:06:23,579 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: run_start +2022-07-29 19:06:24,581 INFO Thread-8 :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json +2022-07-29 19:06:25,890 DEBUG HandlerThread:179745 [meta.py:__init__():40] meta init +2022-07-29 19:06:25,890 DEBUG HandlerThread:179745 [meta.py:__init__():54] meta init done +2022-07-29 19:06:25,890 DEBUG HandlerThread:179745 [meta.py:probe():214] probe +2022-07-29 19:06:25,892 DEBUG HandlerThread:179745 [meta.py:_setup_git():204] setup git +2022-07-29 19:06:25,927 DEBUG HandlerThread:179745 [meta.py:_setup_git():211] setup git done +2022-07-29 19:06:25,927 DEBUG HandlerThread:179745 [meta.py:_save_code():92] save code +2022-07-29 19:06:25,940 DEBUG HandlerThread:179745 [meta.py:_save_code():113] save code done +2022-07-29 19:06:25,940 DEBUG HandlerThread:179745 [meta.py:_save_patches():130] save patches +2022-07-29 19:06:25,995 DEBUG HandlerThread:179745 [meta.py:_save_patches():172] save patches done +2022-07-29 19:06:25,995 DEBUG HandlerThread:179745 [meta.py:_save_pip():58] save pip +2022-07-29 19:06:25,996 DEBUG HandlerThread:179745 [meta.py:_save_pip():72] save pip done +2022-07-29 19:06:25,996 DEBUG HandlerThread:179745 [meta.py:probe():252] probe done +2022-07-29 19:06:26,000 DEBUG SenderThread:179745 [sender.py:send():234] send: files +2022-07-29 19:06:26,000 INFO SenderThread:179745 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-29 19:06:26,000 INFO SenderThread:179745 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-29 19:06:26,006 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:06:26,009 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:06:26,473 INFO Thread-11 :179745 [upload_job.py:push():137] Uploaded file /tmp/tmpwan7wg8uwandb/2j2ps7it-wandb-metadata.json +2022-07-29 19:06:26,579 INFO Thread-8 :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-metadata.json +2022-07-29 19:06:26,579 INFO Thread-8 :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log +2022-07-29 19:06:26,579 INFO Thread-8 :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt +2022-07-29 19:06:26,579 INFO Thread-8 :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/code/run_flax_speech_recognition_ctc.py +2022-07-29 19:06:26,580 INFO Thread-8 :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/code +2022-07-29 19:06:26,710 INFO Thread-12 :179745 [upload_job.py:push():137] Uploaded file /tmp/tmpwan7wg8uwandb/2ncxbc2w-code/run_flax_speech_recognition_ctc.py +2022-07-29 19:06:28,580 INFO Thread-8 :179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log +2022-07-29 19:06:30,581 INFO Thread-8 :179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log +2022-07-29 19:06:32,541 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:32,541 DEBUG SenderThread:179745 [sender.py:send():234] send: telemetry +2022-07-29 19:06:32,542 DEBUG SenderThread:179745 [sender.py:send():234] send: exit +2022-07-29 19:06:32,542 INFO SenderThread:179745 [sender.py:send_exit():366] handling exit code: 1 +2022-07-29 19:06:32,542 INFO SenderThread:179745 [sender.py:send_exit():368] handling runtime: 8 +2022-07-29 19:06:32,542 INFO SenderThread:179745 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:06:32,542 INFO SenderThread:179745 [sender.py:send_exit():374] send defer +2022-07-29 19:06:32,543 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:32,543 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:06:32,543 INFO HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-29 19:06:32,544 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: defer +2022-07-29 19:06:32,544 INFO SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-29 19:06:32,544 INFO SenderThread:179745 [sender.py:transition_state():387] send defer: 1 +2022-07-29 19:06:32,544 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:06:32,544 INFO HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-29 19:06:32,581 INFO Thread-8 :179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log +2022-07-29 19:06:32,582 INFO Thread-8 :179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json +2022-07-29 19:06:32,624 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: defer +2022-07-29 19:06:32,624 INFO SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-29 19:06:32,624 INFO SenderThread:179745 [sender.py:transition_state():387] send defer: 2 +2022-07-29 19:06:32,624 DEBUG SenderThread:179745 [sender.py:send():234] send: stats +2022-07-29 19:06:32,624 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:06:32,624 INFO HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-29 19:06:32,625 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: defer +2022-07-29 19:06:32,625 INFO SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-29 19:06:32,625 INFO SenderThread:179745 [sender.py:transition_state():387] send defer: 3 +2022-07-29 19:06:32,625 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:06:32,625 INFO HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-29 19:06:32,625 DEBUG SenderThread:179745 [sender.py:send():234] send: summary +2022-07-29 19:06:32,625 INFO SenderThread:179745 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:06:32,625 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: defer +2022-07-29 19:06:32,625 INFO SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-29 19:06:32,626 INFO SenderThread:179745 [sender.py:transition_state():387] send defer: 4 +2022-07-29 19:06:32,626 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:06:32,626 INFO HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-29 19:06:32,626 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: defer +2022-07-29 19:06:32,626 INFO SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-29 19:06:32,645 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:32,789 INFO SenderThread:179745 [sender.py:transition_state():387] send defer: 5 +2022-07-29 19:06:32,789 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:32,790 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:06:32,790 INFO HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-29 19:06:32,790 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: defer +2022-07-29 19:06:32,790 INFO SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-29 19:06:32,790 INFO SenderThread:179745 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-29 19:06:32,891 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:33,582 INFO Thread-8 :179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/config.yaml +2022-07-29 19:06:33,582 INFO SenderThread:179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json +2022-07-29 19:06:33,583 INFO SenderThread:179745 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files +2022-07-29 19:06:33,583 INFO SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/config.yaml config.yaml +2022-07-29 19:06:33,583 INFO SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt requirements.txt +2022-07-29 19:06:33,583 INFO SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log output.log +2022-07-29 19:06:33,583 INFO SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json wandb-summary.json +2022-07-29 19:06:33,584 INFO SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-metadata.json wandb-metadata.json +2022-07-29 19:06:33,586 INFO SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-29 19:06:33,586 INFO SenderThread:179745 [sender.py:transition_state():387] send defer: 6 +2022-07-29 19:06:33,587 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:33,592 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:06:33,593 INFO HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-29 19:06:33,593 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: defer +2022-07-29 19:06:33,595 INFO SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-29 19:06:33,596 INFO SenderThread:179745 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 19:06:33,691 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:33,691 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:33,792 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:33,792 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:33,894 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:33,894 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:33,995 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:33,996 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:34,059 INFO Thread-15 :179745 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log +2022-07-29 19:06:34,091 INFO Thread-14 :179745 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt +2022-07-29 19:06:34,097 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:34,097 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:34,102 INFO Thread-16 :179745 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json +2022-07-29 19:06:34,105 INFO Thread-13 :179745 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/config.yaml +2022-07-29 19:06:34,198 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:34,199 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:34,300 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:34,300 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:34,306 INFO Thread-7 :179745 [sender.py:transition_state():387] send defer: 7 +2022-07-29 19:06:34,306 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:06:34,306 INFO HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-29 19:06:34,307 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: defer +2022-07-29 19:06:34,307 INFO SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-29 19:06:34,401 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:34,490 INFO SenderThread:179745 [sender.py:transition_state():387] send defer: 8 +2022-07-29 19:06:34,491 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:34,491 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:06:34,491 INFO HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-29 19:06:34,491 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: defer +2022-07-29 19:06:34,492 INFO SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-29 19:06:34,492 INFO SenderThread:179745 [sender.py:transition_state():387] send defer: 9 +2022-07-29 19:06:34,492 DEBUG SenderThread:179745 [sender.py:send():234] send: final +2022-07-29 19:06:34,492 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:06:34,492 INFO HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-29 19:06:34,492 DEBUG SenderThread:179745 [sender.py:send():234] send: footer +2022-07-29 19:06:34,493 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: defer +2022-07-29 19:06:34,493 INFO SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-29 19:06:34,592 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:06:34,592 DEBUG SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:06:34,592 INFO SenderThread:179745 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 19:06:34,848 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: get_summary +2022-07-29 19:06:34,849 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-29 19:06:34,850 DEBUG HandlerThread:179745 [handler.py:handle_request():130] handle_request: shutdown +2022-07-29 19:06:34,850 INFO HandlerThread:179745 [handler.py:finish():731] shutting down handler +2022-07-29 19:06:35,493 INFO WriterThread:179745 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/run-1kplw9z9.wandb +2022-07-29 19:06:35,848 INFO SenderThread:179745 [sender.py:finish():1070] shutting down sender +2022-07-29 19:06:35,848 INFO SenderThread:179745 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 19:06:35,848 INFO SenderThread:179745 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 19:06:35,850 INFO MainThread:179745 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220729_190622-1kplw9z9/logs/debug.log b/wandb/run-20220729_190622-1kplw9z9/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0474c8910175ed52486a17ec48155d7bfb62a7ec --- /dev/null +++ b/wandb/run-20220729_190622-1kplw9z9/logs/debug.log @@ -0,0 +1,139 @@ +2022-07-29 19:06:22,538 INFO MainThread:178475 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-29 19:06:22,539 INFO MainThread:178475 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-29 19:06:22,539 INFO MainThread:178475 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/logs/debug.log +2022-07-29 19:06:22,539 INFO MainThread:178475 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/logs/debug-internal.log +2022-07-29 19:06:22,539 INFO MainThread:178475 [wandb_init.py:init():404] calling init triggers +2022-07-29 19:06:22,539 INFO MainThread:178475 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-29 19:06:22,539 INFO MainThread:178475 [wandb_init.py:init():460] starting backend +2022-07-29 19:06:22,539 INFO MainThread:178475 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-29 19:06:22,566 INFO MainThread:178475 [backend.py:ensure_launched():216] starting backend process... +2022-07-29 19:06:22,591 INFO MainThread:178475 [backend.py:ensure_launched():221] started backend process with pid: 179745 +2022-07-29 19:06:22,593 INFO MainThread:178475 [wandb_init.py:init():469] backend started and connected +2022-07-29 19:06:22,606 INFO MainThread:178475 [wandb_init.py:init():533] updated telemetry +2022-07-29 19:06:22,668 INFO MainThread:178475 [wandb_init.py:init():563] communicating current version +2022-07-29 19:06:23,398 INFO MainThread:178475 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-29 19:06:23,398 INFO MainThread:178475 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-29 19:06:23,579 INFO MainThread:178475 [wandb_init.py:init():606] starting run threads in backend +2022-07-29 19:06:26,003 INFO MainThread:178475 [wandb_run.py:_console_start():1810] atexit reg +2022-07-29 19:06:26,004 INFO MainThread:178475 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-29 19:06:26,004 INFO MainThread:178475 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-29 19:06:26,007 INFO MainThread:178475 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-29 19:06:26,007 INFO MainThread:178475 [wandb_init.py:init():633] run started, returning control to user process +2022-07-29 19:06:30,488 INFO MainThread:178475 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-29 19:06:30,495 INFO MainThread:178475 [wandb_run.py:_restore():1752] restore +2022-07-29 19:06:32,544 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 73701 +} + +2022-07-29 19:06:32,790 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 73701 +} + +2022-07-29 19:06:33,590 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 85497 +} + +2022-07-29 19:06:33,692 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 85497 +} + +2022-07-29 19:06:33,793 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85497 + total_bytes: 85497 +} + +2022-07-29 19:06:33,895 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85497 + total_bytes: 85497 +} + +2022-07-29 19:06:33,996 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85497 + total_bytes: 85497 +} + +2022-07-29 19:06:34,098 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85497 + total_bytes: 85497 +} + +2022-07-29 19:06:34,199 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85497 + total_bytes: 85497 +} + +2022-07-29 19:06:34,301 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85497 + total_bytes: 85497 +} + +2022-07-29 19:06:34,491 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85497 + total_bytes: 85497 +} + +2022-07-29 19:06:34,848 INFO MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85497 + total_bytes: 85497 +} +local_info { +} + +2022-07-29 19:06:36,413 INFO MainThread:178475 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220729_190622-1kplw9z9/run-1kplw9z9.wandb b/wandb/run-20220729_190622-1kplw9z9/run-1kplw9z9.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e843c03f2638b3c7aa1f54a1fe09fc3c1e740163 --- /dev/null +++ b/wandb/run-20220729_190622-1kplw9z9/run-1kplw9z9.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bee9267448bf968d5a1cc3b1820fa16787862010fd2748e71bef4be596da455 +size 9435 diff --git a/wandb/run-20220729_190943-1pf464vg/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_190943-1pf464vg/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe --- /dev/null +++ b/wandb/run-20220729_190943-1pf464vg/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1596 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_190943-1pf464vg/files/config.yaml b/wandb/run-20220729_190943-1pf464vg/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1c52576d03b2c70e0586b2a98360aea76d1ffec --- /dev/null +++ b/wandb/run-20220729_190943-1pf464vg/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659121783 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_190943-1pf464vg/files/output.log b/wandb/run-20220729_190943-1pf464vg/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..2598417c23468f7489ff8875757267000883d2e9 --- /dev/null +++ b/wandb/run-20220729_190943-1pf464vg/files/output.log @@ -0,0 +1,178 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_19-09-39_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=32, +per_device_train_batch_size=32, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 83.43it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 456.55it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow + + + + + + + + + + + + + + + + + 6%|███████████████▎ | 16/256 [02:54<43:31, 10.88s/ba] +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1596, in + main() + File "run_flax_speech_recognition_ctc.py", line 870, in main + raw_datasets = make_dataset(seed=training_args.seed) + File "run_flax_speech_recognition_ctc.py", line 811, in make_dataset + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 865, in filter + { + File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 866, in + k: dataset.filter( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper + out = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2904, in filter + indices = self.map( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2387, in map + return self._map_single( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 557, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper + out = func(self, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2775, in _map_single + batch = apply_function_on_filtered_inputs( + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2655, in apply_function_on_filtered_inputs + processed_inputs = function(*fn_args, *additional_args, **fn_kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2347, in decorated + result = f(decorated_item, *args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in get_indices_from_mask_function + example = {key: batch[key][i] for key in batch} + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in + example = {key: batch[key][i] for key in batch} + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 133, in __getitem__ + values = [ + File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 134, in + decode_nested_example(self.features[key], value) if value is not None else None for value in values + File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1260, in decode_nested_example + return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) if obj is not None else None + File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 144, in decode_example + array, sampling_rate = self._decode_mp3(file if file else path) + File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 293, in _decode_mp3 + array, sampling_rate = torchaudio.load(path_or_file, format="mp3") + File "/data/flax/lib/python3.8/site-packages/torchaudio/backend/sox_io_backend.py", line 214, in load + return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format) + File "/data/flax/lib/python3.8/site-packages/torchaudio/io/_compat.py", line 110, in load_audio_fileobj + return _load_audio(s, frame_offset, num_frames, convert, channels_first) + File "/data/flax/lib/python3.8/site-packages/torchaudio/io/_compat.py", line 80, in _load_audio + waveform = s.pop_chunks()[0] +KeyboardInterrupt \ No newline at end of file diff --git a/wandb/run-20220729_190943-1pf464vg/files/requirements.txt b/wandb/run-20220729_190943-1pf464vg/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..20cb6abc20ac8cace779da721986ddb6a238845d --- /dev/null +++ b/wandb/run-20220729_190943-1pf464vg/files/requirements.txt @@ -0,0 +1,151 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pygments==2.11.1 +pyparsing==3.0.6 +python-dateutil==2.8.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220729_190943-1pf464vg/files/wandb-metadata.json b/wandb/run-20220729_190943-1pf464vg/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..2189214625406d37ba7ca06fbaaf8bed8a8f1f9f --- /dev/null +++ b/wandb/run-20220729_190943-1pf464vg/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-29T19:09:46.579137", + "startedAt": "2022-07-29T19:09:43.337367", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=32", + "--per_device_eval_batch_size=32", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json b/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..4cab2d787f922fafd371e8f23b9d4df2f6d91744 --- /dev/null +++ b/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 183}} \ No newline at end of file diff --git a/wandb/run-20220729_190943-1pf464vg/logs/debug-internal.log b/wandb/run-20220729_190943-1pf464vg/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..3f4244c7b5c2d5f50398d527d5cc08482ee774a7 --- /dev/null +++ b/wandb/run-20220729_190943-1pf464vg/logs/debug-internal.log @@ -0,0 +1,195 @@ +2022-07-29 19:09:44,154 INFO MainThread:186780 [internal.py:wandb_internal():87] W&B internal server running at pid: 186780, started at: 2022-07-29 19:09:44.154398 +2022-07-29 19:09:44,156 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: check_version +2022-07-29 19:09:44,156 INFO WriterThread:186780 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/run-1pf464vg.wandb +2022-07-29 19:09:44,157 DEBUG SenderThread:186780 [sender.py:send():234] send: header +2022-07-29 19:09:44,157 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: check_version +2022-07-29 19:09:44,192 DEBUG SenderThread:186780 [sender.py:send():234] send: run +2022-07-29 19:09:44,367 INFO SenderThread:186780 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files +2022-07-29 19:09:44,367 INFO SenderThread:186780 [sender.py:_start_run_threads():804] run started: 1pf464vg with start time 1659121783 +2022-07-29 19:09:44,368 DEBUG SenderThread:186780 [sender.py:send():234] send: summary +2022-07-29 19:09:44,368 INFO SenderThread:186780 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:09:44,369 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: run_start +2022-07-29 19:09:45,368 INFO Thread-8 :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json +2022-07-29 19:09:46,578 DEBUG HandlerThread:186780 [meta.py:__init__():40] meta init +2022-07-29 19:09:46,579 DEBUG HandlerThread:186780 [meta.py:__init__():54] meta init done +2022-07-29 19:09:46,579 DEBUG HandlerThread:186780 [meta.py:probe():214] probe +2022-07-29 19:09:46,580 DEBUG HandlerThread:186780 [meta.py:_setup_git():204] setup git +2022-07-29 19:09:46,609 DEBUG HandlerThread:186780 [meta.py:_setup_git():211] setup git done +2022-07-29 19:09:46,609 DEBUG HandlerThread:186780 [meta.py:_save_code():92] save code +2022-07-29 19:09:46,620 DEBUG HandlerThread:186780 [meta.py:_save_code():113] save code done +2022-07-29 19:09:46,620 DEBUG HandlerThread:186780 [meta.py:_save_patches():130] save patches +2022-07-29 19:09:46,675 DEBUG HandlerThread:186780 [meta.py:_save_patches():172] save patches done +2022-07-29 19:09:46,675 DEBUG HandlerThread:186780 [meta.py:_save_pip():58] save pip +2022-07-29 19:09:46,676 DEBUG HandlerThread:186780 [meta.py:_save_pip():72] save pip done +2022-07-29 19:09:46,676 DEBUG HandlerThread:186780 [meta.py:probe():252] probe done +2022-07-29 19:09:46,679 DEBUG SenderThread:186780 [sender.py:send():234] send: files +2022-07-29 19:09:46,679 INFO SenderThread:186780 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-29 19:09:46,679 INFO SenderThread:186780 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-29 19:09:46,683 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:09:46,684 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:09:47,349 INFO Thread-11 :186780 [upload_job.py:push():137] Uploaded file /tmp/tmp5vmmygpewandb/1fuj1l6o-wandb-metadata.json +2022-07-29 19:09:47,355 INFO Thread-12 :186780 [upload_job.py:push():137] Uploaded file /tmp/tmp5vmmygpewandb/2x75esdp-code/run_flax_speech_recognition_ctc.py +2022-07-29 19:09:47,374 INFO Thread-8 :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/requirements.txt +2022-07-29 19:09:47,375 INFO Thread-8 :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-metadata.json +2022-07-29 19:09:47,375 INFO Thread-8 :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/code/run_flax_speech_recognition_ctc.py +2022-07-29 19:09:47,375 INFO Thread-8 :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:09:47,375 INFO Thread-8 :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/code +2022-07-29 19:09:49,375 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:09:51,376 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:09:53,376 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:10:01,850 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:10:01,851 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:10:03,380 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:10:14,648 DEBUG SenderThread:186780 [sender.py:send():234] send: stats +2022-07-29 19:10:15,385 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:10:17,032 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:10:17,032 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:10:25,388 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:10:32,192 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:10:32,192 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:10:35,392 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:10:44,706 DEBUG SenderThread:186780 [sender.py:send():234] send: stats +2022-07-29 19:10:45,395 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:10:47,351 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:10:47,351 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:10:57,399 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:11:02,519 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:11:02,519 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:11:07,403 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:11:14,764 DEBUG SenderThread:186780 [sender.py:send():234] send: stats +2022-07-29 19:11:17,661 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:11:17,661 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:11:19,407 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:11:29,411 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:11:32,824 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:11:32,824 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:11:39,415 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:11:44,822 DEBUG SenderThread:186780 [sender.py:send():234] send: stats +2022-07-29 19:11:47,986 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:11:47,987 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:11:51,419 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:12:01,423 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:12:03,139 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:12:03,139 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:12:11,426 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:12:14,885 DEBUG SenderThread:186780 [sender.py:send():234] send: stats +2022-07-29 19:12:18,295 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:12:18,295 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:12:23,430 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:12:33,434 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:12:33,455 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:12:33,455 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:12:43,438 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:12:44,945 DEBUG SenderThread:186780 [sender.py:send():234] send: stats +2022-07-29 19:12:45,034 WARNING MainThread:186780 [internal.py:wandb_internal():146] Internal process interrupt: 1 +2022-07-29 19:12:47,439 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:12:47,542 DEBUG SenderThread:186780 [sender.py:send():234] send: telemetry +2022-07-29 19:12:47,542 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:47,542 DEBUG SenderThread:186780 [sender.py:send():234] send: exit +2022-07-29 19:12:47,543 INFO SenderThread:186780 [sender.py:send_exit():366] handling exit code: 255 +2022-07-29 19:12:47,543 INFO SenderThread:186780 [sender.py:send_exit():368] handling runtime: 183 +2022-07-29 19:12:47,543 INFO SenderThread:186780 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:12:47,544 INFO SenderThread:186780 [sender.py:send_exit():374] send defer +2022-07-29 19:12:47,544 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:47,544 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:12:47,544 INFO HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-29 19:12:47,544 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: defer +2022-07-29 19:12:47,544 INFO SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-29 19:12:47,544 INFO SenderThread:186780 [sender.py:transition_state():387] send defer: 1 +2022-07-29 19:12:47,545 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:12:47,545 INFO HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-29 19:12:47,552 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: defer +2022-07-29 19:12:47,552 INFO SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-29 19:12:47,552 INFO SenderThread:186780 [sender.py:transition_state():387] send defer: 2 +2022-07-29 19:12:47,552 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:12:47,552 INFO HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-29 19:12:47,552 DEBUG SenderThread:186780 [sender.py:send():234] send: stats +2022-07-29 19:12:47,553 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: defer +2022-07-29 19:12:47,553 INFO SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-29 19:12:47,553 INFO SenderThread:186780 [sender.py:transition_state():387] send defer: 3 +2022-07-29 19:12:47,553 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:12:47,553 INFO HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-29 19:12:47,553 DEBUG SenderThread:186780 [sender.py:send():234] send: summary +2022-07-29 19:12:47,574 INFO SenderThread:186780 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:12:47,575 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: defer +2022-07-29 19:12:47,575 INFO SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-29 19:12:47,575 INFO SenderThread:186780 [sender.py:transition_state():387] send defer: 4 +2022-07-29 19:12:47,575 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:12:47,575 INFO HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-29 19:12:47,575 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: defer +2022-07-29 19:12:47,575 INFO SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-29 19:12:47,646 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:47,755 INFO SenderThread:186780 [sender.py:transition_state():387] send defer: 5 +2022-07-29 19:12:47,755 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:47,756 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:12:47,756 INFO HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-29 19:12:47,756 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: defer +2022-07-29 19:12:47,756 INFO SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-29 19:12:47,756 INFO SenderThread:186780 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-29 19:12:47,857 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:48,440 INFO Thread-8 :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json +2022-07-29 19:12:48,440 INFO SenderThread:186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/config.yaml +2022-07-29 19:12:48,440 INFO SenderThread:186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:12:48,440 INFO SenderThread:186780 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files +2022-07-29 19:12:48,441 INFO SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/config.yaml config.yaml +2022-07-29 19:12:48,441 INFO SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/requirements.txt requirements.txt +2022-07-29 19:12:48,441 INFO SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log output.log +2022-07-29 19:12:48,441 INFO SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json wandb-summary.json +2022-07-29 19:12:48,444 INFO SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-metadata.json wandb-metadata.json +2022-07-29 19:12:48,446 INFO SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-29 19:12:48,446 INFO SenderThread:186780 [sender.py:transition_state():387] send defer: 6 +2022-07-29 19:12:48,446 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:48,449 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:12:48,449 INFO HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-29 19:12:48,450 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: defer +2022-07-29 19:12:48,450 INFO SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-29 19:12:48,450 INFO SenderThread:186780 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 19:12:48,548 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:48,548 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:48,650 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:48,650 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:48,751 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:48,751 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:48,852 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:48,852 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:48,921 INFO Thread-14 :186780 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/requirements.txt +2022-07-29 19:12:48,923 INFO Thread-13 :186780 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/config.yaml +2022-07-29 19:12:48,940 INFO Thread-15 :186780 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log +2022-07-29 19:12:48,954 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:48,954 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:48,958 INFO Thread-16 :186780 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json +2022-07-29 19:12:49,056 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:49,056 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:49,157 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:49,157 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:49,158 INFO Thread-7 :186780 [sender.py:transition_state():387] send defer: 7 +2022-07-29 19:12:49,159 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:12:49,159 INFO HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-29 19:12:49,159 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: defer +2022-07-29 19:12:49,159 INFO SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-29 19:12:49,258 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:49,632 INFO SenderThread:186780 [sender.py:transition_state():387] send defer: 8 +2022-07-29 19:12:49,632 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:49,632 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:12:49,633 INFO HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-29 19:12:49,633 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: defer +2022-07-29 19:12:49,633 INFO SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-29 19:12:49,633 INFO SenderThread:186780 [sender.py:transition_state():387] send defer: 9 +2022-07-29 19:12:49,633 DEBUG SenderThread:186780 [sender.py:send():234] send: final +2022-07-29 19:12:49,633 DEBUG SenderThread:186780 [sender.py:send():234] send: footer +2022-07-29 19:12:49,633 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer +2022-07-29 19:12:49,633 INFO HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-29 19:12:49,634 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: defer +2022-07-29 19:12:49,634 INFO SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-29 19:12:49,733 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 19:12:49,734 DEBUG SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 19:12:49,734 INFO SenderThread:186780 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 19:12:49,995 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: get_summary +2022-07-29 19:12:49,995 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-29 19:12:49,996 DEBUG HandlerThread:186780 [handler.py:handle_request():130] handle_request: shutdown +2022-07-29 19:12:49,996 INFO HandlerThread:186780 [handler.py:finish():731] shutting down handler +2022-07-29 19:12:50,633 INFO WriterThread:186780 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/run-1pf464vg.wandb +2022-07-29 19:12:50,994 INFO SenderThread:186780 [sender.py:finish():1070] shutting down sender +2022-07-29 19:12:50,994 INFO SenderThread:186780 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 19:12:50,994 INFO SenderThread:186780 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 19:12:50,996 INFO MainThread:186780 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220729_190943-1pf464vg/logs/debug.log b/wandb/run-20220729_190943-1pf464vg/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0e7ab297a504c9471649ea54a9b52ee002aa91d8 --- /dev/null +++ b/wandb/run-20220729_190943-1pf464vg/logs/debug.log @@ -0,0 +1,139 @@ +2022-07-29 19:09:43,338 INFO MainThread:185526 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-29 19:09:43,338 INFO MainThread:185526 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-29 19:09:43,339 INFO MainThread:185526 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/logs/debug.log +2022-07-29 19:09:43,339 INFO MainThread:185526 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/logs/debug-internal.log +2022-07-29 19:09:43,339 INFO MainThread:185526 [wandb_init.py:init():404] calling init triggers +2022-07-29 19:09:43,339 INFO MainThread:185526 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-29 19:09:43,339 INFO MainThread:185526 [wandb_init.py:init():460] starting backend +2022-07-29 19:09:43,339 INFO MainThread:185526 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-29 19:09:43,366 INFO MainThread:185526 [backend.py:ensure_launched():216] starting backend process... +2022-07-29 19:09:43,391 INFO MainThread:185526 [backend.py:ensure_launched():221] started backend process with pid: 186780 +2022-07-29 19:09:43,394 INFO MainThread:185526 [wandb_init.py:init():469] backend started and connected +2022-07-29 19:09:43,407 INFO MainThread:185526 [wandb_init.py:init():533] updated telemetry +2022-07-29 19:09:43,471 INFO MainThread:185526 [wandb_init.py:init():563] communicating current version +2022-07-29 19:09:44,191 INFO MainThread:185526 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-29 19:09:44,191 INFO MainThread:185526 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-29 19:09:44,368 INFO MainThread:185526 [wandb_init.py:init():606] starting run threads in backend +2022-07-29 19:09:46,683 INFO MainThread:185526 [wandb_run.py:_console_start():1810] atexit reg +2022-07-29 19:09:46,683 INFO MainThread:185526 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-29 19:09:46,684 INFO MainThread:185526 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-29 19:09:46,686 INFO MainThread:185526 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-29 19:09:46,686 INFO MainThread:185526 [wandb_init.py:init():633] run started, returning control to user process +2022-07-29 19:12:45,038 INFO MainThread:185526 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 255 +2022-07-29 19:12:45,045 INFO MainThread:185526 [wandb_run.py:_restore():1752] restore +2022-07-29 19:12:47,544 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 73701 +} + +2022-07-29 19:12:47,756 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 73701 +} + +2022-07-29 19:12:48,447 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 85823 +} + +2022-07-29 19:12:48,549 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73701 + total_bytes: 85823 +} + +2022-07-29 19:12:48,650 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85823 + total_bytes: 85823 +} + +2022-07-29 19:12:48,752 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85823 + total_bytes: 85823 +} + +2022-07-29 19:12:48,853 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85823 + total_bytes: 85823 +} + +2022-07-29 19:12:48,955 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85823 + total_bytes: 85823 +} + +2022-07-29 19:12:49,056 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85823 + total_bytes: 85823 +} + +2022-07-29 19:12:49,158 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85823 + total_bytes: 85823 +} + +2022-07-29 19:12:49,633 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85823 + total_bytes: 85823 +} + +2022-07-29 19:12:49,994 INFO MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 85823 + total_bytes: 85823 +} +local_info { +} + +2022-07-29 19:12:51,471 INFO MainThread:185526 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220729_190943-1pf464vg/run-1pf464vg.wandb b/wandb/run-20220729_190943-1pf464vg/run-1pf464vg.wandb new file mode 100644 index 0000000000000000000000000000000000000000..ed25aa0b67243a6cecd2022e2d311e2cad49a440 --- /dev/null +++ b/wandb/run-20220729_190943-1pf464vg/run-1pf464vg.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:745d88cafd1997dcb33256411787bcb772b7f8a0771002838f831409a6c2f091 +size 16769 diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_191324-ovnz8vs0/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe --- /dev/null +++ b/wandb/run-20220729_191324-ovnz8vs0/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1596 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml b/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac607fa6c98392ba5fb1cd10e705d04b0553eca0 --- /dev/null +++ b/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659122004 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/output.log b/wandb/run-20220729_191324-ovnz8vs0/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..257138140629bb147ad6b25f5629f8ed53b1cb2b --- /dev/null +++ b/wandb/run-20220729_191324-ovnz8vs0/files/output.log @@ -0,0 +1,804 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_19-13-20_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=128, +per_device_train_batch_size=128, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 78.60it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 469.14it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 256/256 [46:10<00:00, 10.82s/ba] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [13:50<00:00, 10.92s/ba] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [06:10<00:00, 10.57s/ba] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 253741/253741 [01:39<00:00, 2560.29ex/s] + + + + + + + + + + + + + + +100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 75617/75617 [00:28<00:00, 2669.08ex/s] + + + + + + +100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33807/33807 [00:12<00:00, 2611.45ex/s] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 52/52 [10:01<00:00, 11.57s/ba] + + + + + + +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:19<00:00, 11.34s/ba] + + + + + + +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:13<00:00, 10.51s/ba] + + + + + + +100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 51034/51034 [00:11<00:00, 4396.10ex/s] +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6839/6839 [00:01<00:00, 4568.23ex/s] + +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6323/6323 [00:01<00:00, 4447.61ex/s] +https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmpx3at8so3 +Downloading config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.53k/1.53k [00:00<00:00, 989kB/s] +storing https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json in cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +creating metadata file for /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmp9q1m_ych +Downloading preprocessor_config.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 212/212 [00:00<00:00, 163kB/s] +storing https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json in cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +creating metadata file for /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmp9ya5l4jm +Downloading tokenizer_config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 306/306 [00:00<00:00, 226kB/s] +storing https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/tokenizer_config.json in cache at /home/javierr/.cache/huggingface/transformers/32ac0765a83a8c538f4d3d8f104f69eab9648cb4d3e3a6bb6463185512f204d4.ff272c14a3ca512c9d9f85a898e6a086e8f790b06ff540f484000dab53132349 +creating metadata file for /home/javierr/.cache/huggingface/transformers/32ac0765a83a8c538f4d3d8f104f69eab9648cb4d3e3a6bb6463185512f204d4.ff272c14a3ca512c9d9f85a898e6a086e8f790b06ff540f484000dab53132349 +https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmp7d5al8e3 +Downloading vocab.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 289/289 [00:00<00:00, 173kB/s] +storing https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/vocab.json in cache at /home/javierr/.cache/huggingface/transformers/dbb47708b30c45561aee53d4cc808c3ed6c4785df1e2b757c8852837f6f58537.cd4a163fb068cd596a09687d4b1d7e345279d1497fdf547b3c986f4d6ee855b8 +creating metadata file for /home/javierr/.cache/huggingface/transformers/dbb47708b30c45561aee53d4cc808c3ed6c4785df1e2b757c8852837f6f58537.cd4a163fb068cd596a09687d4b1d7e345279d1497fdf547b3c986f4d6ee855b8 +https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/added_tokens.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmpqbnp6r29 +Downloading added_tokens.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23.0/23.0 [00:00<00:00, 19.3kB/s] +storing https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/added_tokens.json in cache at /home/javierr/.cache/huggingface/transformers/a86e893e88bb40472d24ffc4e2be032578bb14755e88bb1e961bad6276161a17.23a5f4e269f91a88f81d8cb3c6f881c8eb30ad98b9c9691569d4cfb87512d722 +creating metadata file for /home/javierr/.cache/huggingface/transformers/a86e893e88bb40472d24ffc4e2be032578bb14755e88bb1e961bad6276161a17.23a5f4e269f91a88f81d8cb3c6f881c8eb30ad98b9c9691569d4cfb87512d722 +https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmpjnl6q31b +Downloading special_tokens_map.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.06k/1.06k [00:00<00:00, 689kB/s] +storing https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/special_tokens_map.json in cache at /home/javierr/.cache/huggingface/transformers/c0891e9bf7d08f1e6d4c0d7da3104155480b7f65b84d2e1ef0506f017fd60443.2fce57d7d6ae62285f9966753bbdba194f7ff904d5f380e3bb6a6242819f9a93 +creating metadata file for /home/javierr/.cache/huggingface/transformers/c0891e9bf7d08f1e6d4c0d7da3104155480b7f65b84d2e1ef0506f017fd60443.2fce57d7d6ae62285f9966753bbdba194f7ff904d5f380e3bb6a6242819f9a93 +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/vocab.json from cache at /home/javierr/.cache/huggingface/transformers/dbb47708b30c45561aee53d4cc808c3ed6c4785df1e2b757c8852837f6f58537.cd4a163fb068cd596a09687d4b1d7e345279d1497fdf547b3c986f4d6ee855b8 +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/tokenizer_config.json from cache at /home/javierr/.cache/huggingface/transformers/32ac0765a83a8c538f4d3d8f104f69eab9648cb4d3e3a6bb6463185512f204d4.ff272c14a3ca512c9d9f85a898e6a086e8f790b06ff540f484000dab53132349 +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/added_tokens.json from cache at /home/javierr/.cache/huggingface/transformers/a86e893e88bb40472d24ffc4e2be032578bb14755e88bb1e961bad6276161a17.23a5f4e269f91a88f81d8cb3c6f881c8eb30ad98b9c9691569d4cfb87512d722 +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/special_tokens_map.json from cache at /home/javierr/.cache/huggingface/transformers/c0891e9bf7d08f1e6d4c0d7da3104155480b7f65b84d2e1ef0506f017fd60443.2fce57d7d6ae62285f9966753bbdba194f7ff904d5f380e3bb6a6242819f9a93 +Adding to the vocabulary +Adding to the vocabulary +Traceback (most recent call last): + File "/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py", line 674, in from_pretrained + resolved_archive_file = cached_path( + File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 284, in cached_path + output_path = get_from_cache( + File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 502, in get_from_cache + _raise_for_status(r) + File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 411, in _raise_for_status + raise EntryNotFoundError(f"404 Client Error: Entry Not Found for url: {response.url}") +transformers.utils.hub.EntryNotFoundError: 404 Client Error: Entry Not Found for url: https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/flax_model.msgpack +During handling of the above exception, another exception occurred: +Traceback (most recent call last): + File "/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py", line 707, in from_pretrained + resolved_archive_file = cached_path( + File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 284, in cached_path + output_path = get_from_cache( + File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 502, in get_from_cache + _raise_for_status(r) + File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 411, in _raise_for_status + raise EntryNotFoundError(f"404 Client Error: Entry Not Found for url: {response.url}") +transformers.utils.hub.EntryNotFoundError: 404 Client Error: Entry Not Found for url: https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/flax_model.msgpack.index.json +During handling of the above exception, another exception occurred: +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1596, in + main() + File "run_flax_speech_recognition_ctc.py", line 988, in main + model = FlaxWav2Vec2ForCTC.from_pretrained( + File "/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py", line 721, in from_pretrained + raise EnvironmentError( +OSError: facebook/wav2vec2-xls-r-1b does not appear to have a file named flax_model.msgpack but there is a file for PyTorch weights. Use `from_pt=True` to load this model from those weights. \ No newline at end of file diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt b/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..20cb6abc20ac8cace779da721986ddb6a238845d --- /dev/null +++ b/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt @@ -0,0 +1,151 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pygments==2.11.1 +pyparsing==3.0.6 +python-dateutil==2.8.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/wandb-metadata.json b/wandb/run-20220729_191324-ovnz8vs0/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0657ff0d4f40d89b7101e7493aea4e29abc549 --- /dev/null +++ b/wandb/run-20220729_191324-ovnz8vs0/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-29T19:13:28.123640", + "startedAt": "2022-07-29T19:13:24.831402", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=128", + "--per_device_eval_batch_size=128", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json b/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..0d78a12cd0bb70c6bb4e30b9459aab8fa516bba0 --- /dev/null +++ b/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 4912}} \ No newline at end of file diff --git a/wandb/run-20220729_191324-ovnz8vs0/logs/debug-internal.log b/wandb/run-20220729_191324-ovnz8vs0/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..7e9f3cacc1c921e78fdbcfcf9a15781ecf597023 --- /dev/null +++ b/wandb/run-20220729_191324-ovnz8vs0/logs/debug-internal.log @@ -0,0 +1,1476 @@ +2022-07-29 19:13:25,646 INFO MainThread:1749416 [internal.py:wandb_internal():87] W&B internal server running at pid: 1749416, started at: 2022-07-29 19:13:25.646038 +2022-07-29 19:13:25,648 INFO WriterThread:1749416 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/run-ovnz8vs0.wandb +2022-07-29 19:13:25,648 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: check_version +2022-07-29 19:13:25,649 DEBUG SenderThread:1749416 [sender.py:send():234] send: header +2022-07-29 19:13:25,649 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: check_version +2022-07-29 19:13:25,688 DEBUG SenderThread:1749416 [sender.py:send():234] send: run +2022-07-29 19:13:25,857 INFO SenderThread:1749416 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files +2022-07-29 19:13:25,857 INFO SenderThread:1749416 [sender.py:_start_run_threads():804] run started: ovnz8vs0 with start time 1659122004 +2022-07-29 19:13:25,859 DEBUG SenderThread:1749416 [sender.py:send():234] send: summary +2022-07-29 19:13:25,859 INFO SenderThread:1749416 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 19:13:25,860 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: run_start +2022-07-29 19:13:26,862 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json +2022-07-29 19:13:28,123 DEBUG HandlerThread:1749416 [meta.py:__init__():40] meta init +2022-07-29 19:13:28,123 DEBUG HandlerThread:1749416 [meta.py:__init__():54] meta init done +2022-07-29 19:13:28,123 DEBUG HandlerThread:1749416 [meta.py:probe():214] probe +2022-07-29 19:13:28,124 DEBUG HandlerThread:1749416 [meta.py:_setup_git():204] setup git +2022-07-29 19:13:28,155 DEBUG HandlerThread:1749416 [meta.py:_setup_git():211] setup git done +2022-07-29 19:13:28,155 DEBUG HandlerThread:1749416 [meta.py:_save_code():92] save code +2022-07-29 19:13:28,167 DEBUG HandlerThread:1749416 [meta.py:_save_code():113] save code done +2022-07-29 19:13:28,167 DEBUG HandlerThread:1749416 [meta.py:_save_patches():130] save patches +2022-07-29 19:13:28,222 DEBUG HandlerThread:1749416 [meta.py:_save_patches():172] save patches done +2022-07-29 19:13:28,222 DEBUG HandlerThread:1749416 [meta.py:_save_pip():58] save pip +2022-07-29 19:13:28,223 DEBUG HandlerThread:1749416 [meta.py:_save_pip():72] save pip done +2022-07-29 19:13:28,223 DEBUG HandlerThread:1749416 [meta.py:probe():252] probe done +2022-07-29 19:13:28,226 DEBUG SenderThread:1749416 [sender.py:send():234] send: files +2022-07-29 19:13:28,226 INFO SenderThread:1749416 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-29 19:13:28,227 INFO SenderThread:1749416 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-29 19:13:28,233 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:13:28,233 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:13:28,788 INFO Thread-11 :1749416 [upload_job.py:push():137] Uploaded file /tmp/tmpp4joxsprwandb/1xi0foyf-wandb-metadata.json +2022-07-29 19:13:28,865 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-metadata.json +2022-07-29 19:13:28,865 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt +2022-07-29 19:13:28,865 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:13:28,865 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/code/run_flax_speech_recognition_ctc.py +2022-07-29 19:13:28,865 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/code +2022-07-29 19:13:28,904 INFO Thread-12 :1749416 [upload_job.py:push():137] Uploaded file /tmp/tmpp4joxsprwandb/1mmr2akn-code/run_flax_speech_recognition_ctc.py +2022-07-29 19:13:30,865 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:13:32,866 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:13:34,867 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:13:43,384 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:13:43,385 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:13:44,872 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:13:56,197 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:13:56,877 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:13:58,544 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:13:58,544 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:14:06,882 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:14:13,688 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:14:13,688 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:14:18,887 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:14:26,257 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:14:28,844 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:14:28,844 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:14:28,892 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:14:38,896 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:14:43,988 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:14:43,988 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:14:50,901 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:14:56,318 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:14:59,129 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:14:59,130 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:15:00,905 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:15:12,910 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:15:14,595 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:15:14,596 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:15:22,914 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:15:26,384 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:15:29,753 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:15:29,754 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:15:34,919 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:15:44,900 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:15:44,900 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:15:44,923 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:15:54,927 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:15:56,447 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:16:00,060 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:16:00,060 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:16:07,932 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:16:15,210 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:16:15,211 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:16:17,936 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:16:26,512 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:16:27,940 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:16:30,360 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:16:30,361 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:16:39,945 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:16:45,516 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:16:45,517 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:16:49,949 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:16:56,576 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:17:00,669 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:17:00,670 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:17:01,954 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:17:11,958 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:17:15,812 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:17:15,812 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:17:21,962 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:17:26,642 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:17:30,960 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:17:30,961 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:17:33,967 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:17:43,971 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:17:46,746 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:17:46,747 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:17:55,976 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:17:56,712 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:18:01,895 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:18:01,895 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:18:05,981 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:18:15,985 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:18:17,057 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:18:17,057 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:18:26,783 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:18:27,990 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:18:32,212 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:18:32,212 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:18:39,996 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:18:47,360 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:18:47,360 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:18:50,001 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:18:56,856 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:19:00,005 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:19:02,508 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:19:02,508 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:19:12,011 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:19:17,659 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:19:17,660 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:19:22,015 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:19:26,927 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:19:32,806 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:19:32,806 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:19:34,020 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:19:44,024 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:19:47,956 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:19:47,957 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:19:54,028 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:19:56,999 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:20:03,105 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:20:03,105 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:20:06,033 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:20:16,038 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:20:18,284 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:20:18,284 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:20:26,042 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:20:27,071 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:20:33,437 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:20:33,437 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:20:39,049 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:20:48,629 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:20:48,630 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:20:49,053 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:20:57,143 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:20:59,058 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:21:03,899 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:21:03,900 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:21:11,063 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:21:19,089 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:21:19,090 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:21:21,068 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:21:27,214 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:21:31,073 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:21:34,244 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:21:34,244 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:21:43,079 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:21:49,397 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:21:49,398 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:21:53,083 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:21:57,286 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:22:03,088 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:22:04,549 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:22:04,549 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:22:15,094 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:22:19,717 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:22:19,718 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:22:25,099 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:22:27,355 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:22:34,892 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:22:34,893 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:22:35,104 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:22:47,110 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:22:50,043 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:22:50,043 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:22:57,114 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:22:57,414 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:23:05,200 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:23:05,201 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:23:07,119 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:23:19,125 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:23:20,359 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:23:20,359 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:23:27,473 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:23:29,129 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:23:35,517 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:23:35,517 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:23:41,135 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:23:50,672 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:23:50,672 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:23:51,139 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:23:57,535 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:24:03,144 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:24:05,857 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:24:05,858 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:24:13,148 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:24:21,017 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:24:21,018 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:24:23,153 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:24:27,594 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:24:35,158 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:24:36,179 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:24:36,180 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:24:45,163 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:24:51,368 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:24:51,368 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:24:57,168 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:24:57,654 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:25:06,514 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:25:06,514 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:25:07,172 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:25:20,178 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:25:21,675 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:25:21,676 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:25:27,714 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:25:30,182 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:25:36,834 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:25:36,834 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:25:40,187 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:25:51,996 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:25:51,996 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:25:52,192 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:25:57,774 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:26:02,197 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:26:07,151 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:26:07,152 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:26:12,201 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:26:22,416 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:26:22,417 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:26:24,206 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:26:27,833 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:26:34,210 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:26:37,669 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:26:37,669 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:26:44,214 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:26:52,828 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:26:52,828 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:26:56,219 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:26:57,895 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:27:06,224 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:27:07,986 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:27:07,986 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:27:16,228 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:27:23,163 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:27:23,163 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:27:27,954 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:27:28,233 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:27:38,237 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:27:38,326 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:27:38,327 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:27:50,242 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:27:53,484 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:27:53,484 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:27:58,014 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:28:00,246 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:28:08,646 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:28:08,646 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:28:10,250 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:28:22,255 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:28:23,804 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:28:23,804 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:28:28,074 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:28:32,259 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:28:38,953 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:28:38,954 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:28:42,263 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:28:54,109 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:28:54,109 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:28:54,268 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:28:58,138 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:29:06,273 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:29:09,269 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:29:09,270 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:29:16,278 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:29:24,431 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:29:24,431 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:29:26,282 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:29:28,198 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:29:39,288 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:29:39,578 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:29:39,578 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:29:49,292 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:29:54,734 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:29:54,735 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:29:58,258 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:29:59,297 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:30:09,899 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:30:09,899 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:30:11,302 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:30:21,307 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:30:25,056 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:30:25,056 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:30:28,318 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:30:33,313 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:30:40,249 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:30:40,250 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:30:43,317 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:30:53,322 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:30:55,492 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:30:55,492 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:30:58,390 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:31:05,326 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:31:10,651 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:31:10,652 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:31:15,331 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:31:25,806 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:31:25,809 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:31:27,336 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:31:28,455 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:31:37,340 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:31:40,972 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:31:40,973 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:31:49,346 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:31:56,134 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:31:56,135 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:31:58,515 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:31:59,350 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:32:09,355 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:32:11,346 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:32:11,346 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:32:19,360 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:32:26,496 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:32:26,496 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:32:28,574 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:32:31,365 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:32:41,370 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:32:41,638 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:32:41,638 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:32:53,375 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:32:56,811 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:32:56,811 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:32:58,634 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:33:03,379 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:33:11,972 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:33:11,972 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:33:13,384 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:33:25,388 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:33:27,123 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:33:27,124 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:33:28,701 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:33:35,392 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:33:42,275 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:33:42,276 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:33:45,397 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:33:57,402 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:33:57,429 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:33:57,430 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:33:58,770 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:34:07,406 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:34:12,600 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:34:12,600 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:34:20,413 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:34:27,756 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:34:27,756 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:34:28,841 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:34:30,417 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:34:42,422 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:34:42,912 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:34:42,912 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:34:52,426 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:34:58,075 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:34:58,076 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:34:58,913 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:35:02,431 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:35:13,226 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:35:13,227 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:35:14,436 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:35:24,440 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:35:28,383 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:35:28,383 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:35:28,987 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:35:34,444 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:35:43,542 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:35:43,542 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:35:46,449 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:35:56,453 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:35:58,714 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:35:58,715 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:35:59,057 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:36:08,459 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:36:13,979 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:36:13,979 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:36:18,463 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:36:28,467 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:36:29,128 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:36:29,148 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:36:29,148 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:36:40,472 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:36:44,312 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:36:44,313 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:36:50,476 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:36:59,200 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:36:59,465 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:36:59,465 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:37:00,480 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:37:12,485 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:37:14,643 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:37:14,643 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:37:22,489 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:37:29,273 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:37:29,804 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:37:29,804 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:37:32,493 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:37:44,498 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:37:44,964 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:37:44,965 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:37:54,502 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:37:59,345 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:38:00,126 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:38:00,126 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:38:04,506 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:38:15,279 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:38:15,279 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:38:16,510 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:38:26,514 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:38:29,418 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:38:30,427 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:38:30,427 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:38:39,520 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:38:45,594 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:38:45,594 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:38:49,524 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:38:59,491 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:38:59,528 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:39:00,751 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:39:00,751 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:39:11,533 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:39:15,913 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:39:15,914 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:39:21,537 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:39:29,562 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:39:31,068 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:39:31,069 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:39:33,542 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:39:43,546 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:39:46,227 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:39:46,227 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:39:55,551 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:39:59,623 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:40:01,377 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:40:01,377 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:40:05,556 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:40:15,560 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:40:16,535 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:40:16,536 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:40:27,565 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:40:29,684 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:40:31,696 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:40:31,696 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:40:37,570 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:40:46,840 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:40:46,840 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:40:47,574 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:40:59,578 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:40:59,743 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:41:02,009 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:41:02,009 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:41:09,582 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:41:17,160 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:41:17,161 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:41:21,587 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:41:29,803 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:41:31,592 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:41:32,309 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:41:32,309 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:41:41,596 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:41:47,463 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:41:47,464 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:41:53,601 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:41:59,863 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:42:02,621 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:42:02,622 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:42:03,606 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:42:13,611 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:42:17,782 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:42:17,782 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:42:25,616 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:42:29,922 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:42:32,921 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:42:32,921 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:42:35,621 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:42:47,626 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:42:48,103 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:42:48,104 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:42:57,630 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:42:59,994 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:43:03,255 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:43:03,255 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:43:08,634 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:43:18,404 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:43:18,404 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:43:20,639 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:43:30,065 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:43:30,643 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:43:33,560 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:43:33,561 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:43:40,647 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:43:48,717 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:43:48,717 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:43:52,652 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:44:00,137 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:44:02,656 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:44:03,885 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:44:03,885 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:44:12,659 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:44:19,045 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:44:19,046 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:44:24,664 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:44:30,209 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:44:34,203 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:44:34,204 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:44:36,669 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:44:46,673 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:44:49,359 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:44:49,359 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:44:56,677 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:45:00,282 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:45:04,543 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:45:04,543 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:45:08,682 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:45:18,686 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:45:19,720 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:45:19,720 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:45:28,689 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:45:30,349 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:45:34,878 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:45:34,878 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:45:40,694 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:45:50,032 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:45:50,032 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:45:50,698 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:46:00,410 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:46:02,703 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:46:05,807 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:46:05,808 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:46:12,707 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:46:20,975 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:46:20,975 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:46:22,711 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:46:30,470 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:46:32,715 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:46:36,143 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:46:36,144 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:46:44,720 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:46:51,295 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:46:51,296 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:46:54,724 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:47:00,530 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:47:06,449 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:47:06,449 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:47:06,729 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:47:16,733 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:47:21,601 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:47:21,601 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:47:26,737 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:47:30,591 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:47:36,757 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:47:36,758 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:47:38,742 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:47:49,746 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:47:51,906 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:47:51,907 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:48:00,651 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:48:01,751 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:48:07,056 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:48:07,056 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:48:11,755 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:48:21,759 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:48:22,252 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:48:22,252 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:48:30,723 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:48:33,763 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:48:37,402 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:48:37,403 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:48:43,767 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:48:52,561 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:48:52,562 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:48:55,772 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:49:00,798 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:49:05,776 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:49:07,712 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:49:07,712 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:49:15,781 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:49:22,869 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:49:22,869 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:49:27,786 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:49:30,870 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:49:37,790 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:49:38,023 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:49:38,024 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:49:49,795 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:49:53,232 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:49:53,233 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:49:59,800 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:50:00,942 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:50:08,381 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:50:08,381 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:50:11,805 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:50:21,809 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:50:23,542 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:50:23,542 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:50:31,002 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:50:31,813 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:50:38,698 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:50:38,699 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:50:43,817 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:50:53,822 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:50:53,852 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:50:53,852 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:51:01,062 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:51:05,827 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:51:08,999 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:51:09,000 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:51:15,831 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:51:24,152 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:51:24,152 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:51:25,835 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:51:31,121 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:51:37,840 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:51:39,307 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:51:39,308 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:51:47,845 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:51:54,454 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:51:54,455 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:51:57,849 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:52:01,185 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:52:09,606 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:52:09,606 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:52:09,854 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:52:20,858 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:52:24,761 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:52:24,761 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:52:30,862 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:52:31,246 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:52:39,919 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:52:39,920 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:52:42,867 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:52:52,872 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:52:55,081 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:52:55,082 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:53:01,306 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:53:04,877 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:53:10,224 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:53:10,224 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:53:14,882 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:53:25,392 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:53:25,393 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:53:26,888 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:53:31,366 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:53:36,892 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:53:40,546 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:53:40,546 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:53:46,896 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:53:55,695 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:53:55,695 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:53:56,901 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:54:01,426 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:54:08,906 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:54:10,860 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:54:10,860 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:54:20,911 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:54:26,018 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:54:26,018 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:54:30,915 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:54:31,486 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:54:40,919 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:54:41,183 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:54:41,183 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:54:52,924 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:54:56,343 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:54:56,343 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:55:01,555 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:55:04,929 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:55:11,483 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:55:11,483 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:55:14,933 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:55:24,937 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:55:26,633 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:55:26,634 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:55:31,614 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:55:36,942 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:55:41,793 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:55:41,793 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:55:46,946 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:55:56,953 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:55:56,953 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:55:58,951 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:56:03,778 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:56:08,955 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:56:16,952 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:56:16,952 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:56:19,960 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:56:31,735 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:56:31,965 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:56:32,110 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:56:32,110 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:56:41,970 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:56:47,269 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:56:47,269 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:56:51,974 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:57:01,798 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:57:02,440 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:57:02,441 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:57:03,980 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:57:13,984 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:57:17,596 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:57:17,596 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:57:25,989 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:57:31,858 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:57:32,762 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:57:32,762 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:57:35,994 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:57:45,998 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:57:47,947 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:57:47,948 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:57:58,003 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:58:01,918 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:58:03,105 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:58:03,105 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:58:08,007 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:58:18,273 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:58:18,274 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:58:20,013 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:58:30,017 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:58:31,978 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:58:33,439 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:58:33,439 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:58:42,022 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:58:48,589 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:58:48,589 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:58:52,026 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:59:02,039 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:59:03,757 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:59:03,757 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:59:04,031 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:59:14,036 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:59:18,918 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:59:18,919 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:59:24,040 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:59:32,099 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 19:59:34,080 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:59:34,080 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:59:36,045 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:59:44,049 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 19:59:49,232 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 19:59:49,233 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 19:59:56,054 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:00:02,158 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:00:04,382 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:00:04,382 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:00:08,059 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:00:18,063 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:00:19,544 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:00:19,544 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:00:30,068 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:00:32,218 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:00:34,696 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:00:34,696 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:00:40,072 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:00:49,856 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:00:49,856 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:00:50,077 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:01:02,083 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:01:02,278 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:01:05,061 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:01:05,062 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:01:15,088 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:01:20,256 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:01:20,256 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:01:25,093 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:01:32,338 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:01:35,097 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:01:35,407 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:01:35,408 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:01:47,102 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:01:50,566 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:01:50,566 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:01:57,106 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:02:02,398 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:02:05,736 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:02:05,736 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:02:07,110 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:02:19,115 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:02:20,913 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:02:20,914 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:02:29,119 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:02:32,458 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:02:36,088 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:02:36,088 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:02:41,124 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:02:51,128 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:02:51,251 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:02:51,251 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:03:01,132 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:03:02,517 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:03:06,409 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:03:06,409 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:03:13,137 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:03:21,569 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:03:21,570 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:03:23,140 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:03:32,578 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:03:35,145 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:03:36,729 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:03:36,729 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:03:45,150 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:03:51,906 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:03:51,906 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:03:57,155 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:04:02,638 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:04:07,048 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:04:07,048 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:04:07,159 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:04:17,164 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:04:22,202 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:04:22,203 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:04:29,169 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:04:32,698 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:04:37,371 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:04:37,372 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:04:39,174 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:04:51,179 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:04:52,527 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:04:52,527 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:05:01,183 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:05:02,758 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:05:07,690 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:05:07,691 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:05:13,188 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:05:22,872 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:05:22,873 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:05:23,193 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:05:32,818 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:05:35,198 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:05:38,026 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:05:38,027 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:05:46,203 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:05:53,190 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:05:53,191 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:05:56,207 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:06:02,881 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:06:08,212 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:06:08,353 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:06:08,354 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:06:18,217 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:06:23,529 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:06:23,529 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:06:30,222 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:06:32,954 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:06:38,680 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:06:38,680 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:06:40,226 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:06:52,232 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:06:53,842 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:06:53,842 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:07:02,237 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:07:03,014 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:07:09,014 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:07:09,015 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:07:14,242 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:07:24,163 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:07:24,164 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:07:24,246 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:07:33,074 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:07:34,250 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:07:39,326 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:07:39,327 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:07:46,255 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:07:54,479 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:07:54,480 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:07:56,259 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:08:03,134 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:08:08,264 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:08:09,625 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:08:09,625 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:08:18,268 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:08:24,793 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:08:24,793 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:08:28,273 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:08:33,193 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:08:39,945 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:08:39,946 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:08:40,278 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:08:50,282 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:08:55,123 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:08:55,123 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:09:02,287 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:09:03,254 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:09:10,277 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:09:10,278 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:09:12,291 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:09:24,296 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:09:25,427 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:09:25,427 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:09:33,318 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:09:34,301 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:09:40,574 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:09:40,575 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:09:44,305 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:09:55,728 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:09:55,729 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:09:56,310 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:10:03,388 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:10:06,314 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:10:10,884 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:10:10,885 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:10:19,319 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:10:26,049 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:10:26,050 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:10:29,324 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:10:33,454 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:10:41,202 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:10:41,202 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:10:41,329 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:10:51,333 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:10:56,359 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:10:56,360 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:11:03,338 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:11:03,514 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:11:11,544 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:11:11,544 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:11:13,342 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:11:25,347 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:11:26,696 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:11:26,696 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:11:33,574 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:11:35,352 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:11:41,848 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:11:41,848 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:11:45,356 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:11:56,996 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:11:56,997 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:11:57,362 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:12:03,634 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:12:07,366 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:12:12,163 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:12:12,163 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:12:19,371 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:12:27,312 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:12:27,312 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:12:29,376 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:12:33,693 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:12:41,381 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:12:42,462 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:12:42,462 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:12:51,386 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:12:57,618 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:12:57,618 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:13:03,391 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:13:03,754 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:13:12,777 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:13:12,777 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:13:13,395 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:13:23,399 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:13:27,938 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:13:27,939 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:13:33,813 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:13:35,404 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:13:43,093 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:13:43,093 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:13:45,409 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:13:57,414 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:13:58,247 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:13:58,248 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:14:03,873 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:14:07,418 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:14:13,400 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:14:13,401 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:14:17,422 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:14:28,557 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:14:28,557 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:14:30,428 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:14:33,934 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:14:40,432 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:14:43,731 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:14:43,731 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:14:50,436 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:14:58,884 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:14:58,885 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:15:02,441 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:15:04,001 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:15:12,445 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:15:14,035 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:15:14,035 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:15:24,451 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:15:29,203 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:15:29,203 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:15:34,062 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:15:34,455 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:15:44,364 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:15:44,364 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:15:46,460 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:15:56,464 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:15:59,523 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:15:59,523 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:16:04,121 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:16:06,468 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:16:14,686 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:16:14,686 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:16:18,473 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:16:28,478 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:16:29,992 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:16:29,993 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:16:34,182 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:16:40,483 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:16:45,147 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:16:45,148 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:16:50,488 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:17:00,286 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:17:00,286 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:17:00,492 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:17:04,242 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:17:12,496 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:17:15,450 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:17:15,451 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:17:22,501 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:17:30,605 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:17:30,606 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:17:34,302 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:17:34,506 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:17:44,510 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:17:45,772 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:17:45,773 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:17:56,515 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:18:00,928 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:18:00,929 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:18:04,363 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:18:06,520 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:18:16,083 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:18:16,083 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:18:16,524 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:18:28,530 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:18:31,233 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:18:31,233 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:18:34,423 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:18:38,534 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:18:46,379 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:18:46,380 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:18:51,539 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:01,535 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:19:01,536 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:19:01,544 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:04,481 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:19:11,548 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:16,684 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:19:16,685 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:19:23,553 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:31,835 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:19:31,835 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:19:33,557 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:34,541 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:19:43,561 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:45,562 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:47,053 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:19:47,053 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:19:47,563 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:49,564 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:51,565 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:53,566 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:55,567 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:57,568 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:19:59,569 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:01,569 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:02,258 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:20:02,258 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:20:03,570 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:04,612 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:20:05,571 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:07,572 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:09,573 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:11,574 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:13,575 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:15,575 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:17,397 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:20:17,398 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:20:17,576 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:19,577 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:21,578 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:23,579 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:25,580 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:27,581 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:29,582 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:31,583 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:32,583 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:20:32,583 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:20:33,586 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:34,685 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:20:35,587 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:37,588 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:39,589 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:41,590 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:43,591 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:45,592 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:47,593 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:47,755 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:20:47,755 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:20:49,594 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:51,594 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:53,596 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:55,597 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:57,598 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:20:59,599 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:01,600 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:02,922 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:21:02,923 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:21:04,601 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:04,759 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:21:06,602 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:08,603 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:10,604 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:12,605 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:14,606 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:16,607 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:18,080 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:21:18,080 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:21:18,608 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:20,609 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:22,610 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:24,611 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:26,612 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:28,613 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:30,614 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:32,615 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:33,261 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:21:33,261 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:21:34,616 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:34,833 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:21:36,617 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:38,618 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:40,619 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:42,619 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:44,620 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:46,621 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:48,428 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:21:48,428 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:21:48,622 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:50,623 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:52,624 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:54,625 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:21:58,627 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:22:00,628 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:22:02,629 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:22:03,563 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:22:03,563 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:22:04,630 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:22:04,909 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:22:06,631 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:22:08,632 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:22:10,633 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:22:12,634 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:22:18,733 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:22:18,733 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:22:24,639 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:22:33,895 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:22:33,895 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:22:34,970 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:22:36,645 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:22:48,650 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:22:49,056 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:22:49,056 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:22:58,654 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:23:04,213 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:23:04,213 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:23:05,030 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:23:10,660 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:23:19,375 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:23:19,376 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:23:22,665 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:23:34,556 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:23:34,556 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:23:34,670 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:23:35,090 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:23:44,674 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:23:49,712 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:23:49,712 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:23:58,680 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:24:04,883 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:24:04,884 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:24:05,150 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:24:11,685 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:24:20,037 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:24:20,038 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:24:21,689 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:24:33,694 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:24:35,210 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:24:35,217 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:24:35,217 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:24:45,699 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:24:50,378 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:24:50,378 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:24:57,704 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:25:05,271 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:25:05,544 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:25:05,544 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:25:07,709 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:25:19,714 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:25:20,689 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:25:20,690 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:25:31,719 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:25:35,331 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:25:35,850 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:25:35,850 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:25:43,724 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:25:50,998 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:25:50,998 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:25:55,729 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:26:05,394 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:26:06,161 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:26:06,162 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:26:07,734 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:26:19,739 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:26:21,310 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:26:21,311 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:26:31,744 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:26:35,454 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:26:36,478 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:26:36,479 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:26:43,749 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:26:51,624 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:26:51,624 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:26:55,754 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:27:05,514 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:27:07,015 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:27:07,015 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:27:07,759 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:27:19,764 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:27:22,246 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:27:22,246 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:27:31,769 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:27:35,576 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:27:37,409 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:27:37,409 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:27:41,773 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:27:52,578 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:27:52,578 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:27:53,778 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:28:05,637 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:28:05,783 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:28:07,725 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:28:07,725 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:28:17,788 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:28:22,867 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:28:22,867 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:28:27,792 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:28:35,698 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:28:38,034 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:28:38,034 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:28:39,797 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:28:52,803 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:28:53,182 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:28:53,182 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:29:02,807 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:29:05,758 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:29:08,363 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:29:08,364 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:29:14,812 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:29:23,507 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:29:23,508 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:29:26,817 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:29:35,818 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:29:38,664 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:29:38,664 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:29:38,822 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:29:48,826 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:29:53,809 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:29:53,809 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:30:00,832 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:30:05,878 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:30:08,975 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:30:08,975 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:30:12,837 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:30:24,131 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:30:24,131 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:30:24,843 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:30:35,939 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:30:36,848 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:30:39,304 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:30:39,305 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:30:48,853 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:30:54,448 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:30:54,448 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:31:00,858 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:31:05,999 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:31:09,602 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:31:09,603 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:31:12,864 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:31:24,761 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:31:24,761 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:31:24,869 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:31:34,873 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:31:36,058 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:31:39,922 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:31:39,922 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:31:46,878 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:31:55,103 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:31:55,103 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:31:58,884 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:32:06,121 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:32:10,271 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:32:10,271 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:32:10,889 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:32:14,891 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:32:25,423 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:32:25,423 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:32:26,896 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:32:36,181 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:32:36,900 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:32:40,574 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:32:40,575 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:32:48,905 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:32:55,730 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:32:55,730 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:33:00,910 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:33:06,242 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:33:10,882 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:33:10,882 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:33:12,915 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:33:22,919 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:33:26,031 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:33:26,032 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:33:33,924 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:33:36,302 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:33:41,181 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:33:41,181 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:33:45,929 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:33:56,333 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:33:56,333 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:33:57,934 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:34:06,362 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:34:09,940 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:34:11,532 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:34:11,532 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:34:19,944 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:34:26,679 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:34:26,680 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:34:31,949 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:34:36,423 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:34:41,850 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:34:41,850 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:34:43,954 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:34:47,956 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:34:49,957 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:34:51,958 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:34:53,959 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:34:55,960 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:34:56,988 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:34:56,988 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:34:57,961 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:34:59,962 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:35:01,963 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:35:03,964 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:35:05,965 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:35:06,494 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:35:11,968 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:35:12,137 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 20:35:12,137 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status +2022-07-29 20:35:13,968 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:35:15,969 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:35:17,970 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:35:18,373 DEBUG SenderThread:1749416 [sender.py:send():234] send: telemetry +2022-07-29 20:35:18,373 DEBUG SenderThread:1749416 [sender.py:send():234] send: exit +2022-07-29 20:35:18,373 INFO SenderThread:1749416 [sender.py:send_exit():366] handling exit code: 1 +2022-07-29 20:35:18,375 INFO SenderThread:1749416 [sender.py:send_exit():368] handling runtime: 4912 +2022-07-29 20:35:18,376 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:18,376 INFO SenderThread:1749416 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 20:35:18,377 INFO SenderThread:1749416 [sender.py:send_exit():374] send defer +2022-07-29 20:35:18,377 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:18,378 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer +2022-07-29 20:35:18,378 INFO HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-29 20:35:18,378 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: defer +2022-07-29 20:35:18,378 INFO SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-29 20:35:18,378 INFO SenderThread:1749416 [sender.py:transition_state():387] send defer: 1 +2022-07-29 20:35:18,379 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer +2022-07-29 20:35:18,379 INFO HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-29 20:35:18,424 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: defer +2022-07-29 20:35:18,424 INFO SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-29 20:35:18,424 INFO SenderThread:1749416 [sender.py:transition_state():387] send defer: 2 +2022-07-29 20:35:18,424 DEBUG SenderThread:1749416 [sender.py:send():234] send: stats +2022-07-29 20:35:18,424 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer +2022-07-29 20:35:18,424 INFO HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-29 20:35:18,425 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: defer +2022-07-29 20:35:18,425 INFO SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-29 20:35:18,425 INFO SenderThread:1749416 [sender.py:transition_state():387] send defer: 3 +2022-07-29 20:35:18,425 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer +2022-07-29 20:35:18,425 INFO HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-29 20:35:18,425 DEBUG SenderThread:1749416 [sender.py:send():234] send: summary +2022-07-29 20:35:18,426 INFO SenderThread:1749416 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 20:35:18,426 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: defer +2022-07-29 20:35:18,426 INFO SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-29 20:35:18,426 INFO SenderThread:1749416 [sender.py:transition_state():387] send defer: 4 +2022-07-29 20:35:18,426 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer +2022-07-29 20:35:18,426 INFO HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-29 20:35:18,426 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: defer +2022-07-29 20:35:18,426 INFO SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-29 20:35:18,479 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:18,605 INFO SenderThread:1749416 [sender.py:transition_state():387] send defer: 5 +2022-07-29 20:35:18,605 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:18,606 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer +2022-07-29 20:35:18,606 INFO HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-29 20:35:18,606 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: defer +2022-07-29 20:35:18,606 INFO SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-29 20:35:18,606 INFO SenderThread:1749416 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-29 20:35:18,707 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:18,971 INFO Thread-8 :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml +2022-07-29 20:35:18,971 INFO SenderThread:1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:35:18,971 INFO SenderThread:1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json +2022-07-29 20:35:18,972 INFO SenderThread:1749416 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files +2022-07-29 20:35:18,972 INFO SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml config.yaml +2022-07-29 20:35:18,972 INFO SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt requirements.txt +2022-07-29 20:35:18,972 INFO SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log output.log +2022-07-29 20:35:18,973 INFO SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json wandb-summary.json +2022-07-29 20:35:18,973 INFO SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-metadata.json wandb-metadata.json +2022-07-29 20:35:18,976 INFO SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-29 20:35:18,976 INFO SenderThread:1749416 [sender.py:transition_state():387] send defer: 6 +2022-07-29 20:35:18,976 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:18,982 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer +2022-07-29 20:35:18,982 INFO HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-29 20:35:18,982 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: defer +2022-07-29 20:35:18,982 INFO SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-29 20:35:18,982 INFO SenderThread:1749416 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 20:35:19,081 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:19,081 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:19,182 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:19,183 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:19,284 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:19,284 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:19,386 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:19,386 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:19,464 INFO Thread-14 :1749416 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt +2022-07-29 20:35:19,466 INFO Thread-15 :1749416 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log +2022-07-29 20:35:19,487 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:19,488 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:19,543 INFO Thread-13 :1749416 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml +2022-07-29 20:35:19,584 INFO Thread-16 :1749416 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json +2022-07-29 20:35:19,589 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:19,589 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:19,691 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:19,691 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:19,784 INFO Thread-7 :1749416 [sender.py:transition_state():387] send defer: 7 +2022-07-29 20:35:19,785 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer +2022-07-29 20:35:19,785 INFO HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-29 20:35:19,785 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: defer +2022-07-29 20:35:19,785 INFO SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-29 20:35:19,792 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:20,263 INFO SenderThread:1749416 [sender.py:transition_state():387] send defer: 8 +2022-07-29 20:35:20,263 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:20,264 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer +2022-07-29 20:35:20,264 INFO HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-29 20:35:20,264 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: defer +2022-07-29 20:35:20,264 INFO SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-29 20:35:20,264 INFO SenderThread:1749416 [sender.py:transition_state():387] send defer: 9 +2022-07-29 20:35:20,265 DEBUG SenderThread:1749416 [sender.py:send():234] send: final +2022-07-29 20:35:20,265 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer +2022-07-29 20:35:20,265 INFO HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-29 20:35:20,265 DEBUG SenderThread:1749416 [sender.py:send():234] send: footer +2022-07-29 20:35:20,265 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: defer +2022-07-29 20:35:20,265 INFO SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-29 20:35:20,365 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 20:35:20,365 DEBUG SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 20:35:20,366 INFO SenderThread:1749416 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 20:35:20,624 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: get_summary +2022-07-29 20:35:20,625 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-29 20:35:20,625 DEBUG HandlerThread:1749416 [handler.py:handle_request():130] handle_request: shutdown +2022-07-29 20:35:20,625 INFO HandlerThread:1749416 [handler.py:finish():731] shutting down handler +2022-07-29 20:35:21,266 INFO WriterThread:1749416 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/run-ovnz8vs0.wandb +2022-07-29 20:35:21,623 INFO SenderThread:1749416 [sender.py:finish():1070] shutting down sender +2022-07-29 20:35:21,623 INFO SenderThread:1749416 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 20:35:21,623 INFO SenderThread:1749416 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 20:35:21,626 INFO MainThread:1749416 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220729_191324-ovnz8vs0/logs/debug.log b/wandb/run-20220729_191324-ovnz8vs0/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..ec544f004b267b794570dff767a374deb6742558 --- /dev/null +++ b/wandb/run-20220729_191324-ovnz8vs0/logs/debug.log @@ -0,0 +1,139 @@ +2022-07-29 19:13:24,832 INFO MainThread:1748149 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-29 19:13:24,833 INFO MainThread:1748149 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-29 19:13:24,833 INFO MainThread:1748149 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/logs/debug.log +2022-07-29 19:13:24,833 INFO MainThread:1748149 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/logs/debug-internal.log +2022-07-29 19:13:24,833 INFO MainThread:1748149 [wandb_init.py:init():404] calling init triggers +2022-07-29 19:13:24,833 INFO MainThread:1748149 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-29 19:13:24,833 INFO MainThread:1748149 [wandb_init.py:init():460] starting backend +2022-07-29 19:13:24,833 INFO MainThread:1748149 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-29 19:13:24,861 INFO MainThread:1748149 [backend.py:ensure_launched():216] starting backend process... +2022-07-29 19:13:24,887 INFO MainThread:1748149 [backend.py:ensure_launched():221] started backend process with pid: 1749416 +2022-07-29 19:13:24,890 INFO MainThread:1748149 [wandb_init.py:init():469] backend started and connected +2022-07-29 19:13:24,904 INFO MainThread:1748149 [wandb_init.py:init():533] updated telemetry +2022-07-29 19:13:24,968 INFO MainThread:1748149 [wandb_init.py:init():563] communicating current version +2022-07-29 19:13:25,686 INFO MainThread:1748149 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-29 19:13:25,687 INFO MainThread:1748149 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-29 19:13:25,860 INFO MainThread:1748149 [wandb_init.py:init():606] starting run threads in backend +2022-07-29 19:13:28,230 INFO MainThread:1748149 [wandb_run.py:_console_start():1810] atexit reg +2022-07-29 19:13:28,231 INFO MainThread:1748149 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-29 19:13:28,231 INFO MainThread:1748149 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-29 19:13:28,233 INFO MainThread:1748149 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-29 19:13:28,234 INFO MainThread:1748149 [wandb_init.py:init():633] run started, returning control to user process +2022-07-29 20:35:15,919 INFO MainThread:1748149 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-29 20:35:15,924 INFO MainThread:1748149 [wandb_run.py:_restore():1752] restore +2022-07-29 20:35:18,378 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73703 + total_bytes: 73703 +} + +2022-07-29 20:35:18,606 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73703 + total_bytes: 73703 +} + +2022-07-29 20:35:18,980 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73703 + total_bytes: 106250 +} + +2022-07-29 20:35:19,081 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73703 + total_bytes: 106250 +} + +2022-07-29 20:35:19,183 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 106221 + total_bytes: 106250 +} + +2022-07-29 20:35:19,285 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 106250 + total_bytes: 106250 +} + +2022-07-29 20:35:19,387 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 106250 + total_bytes: 106250 +} + +2022-07-29 20:35:19,488 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 106250 + total_bytes: 106250 +} + +2022-07-29 20:35:19,590 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 106250 + total_bytes: 106250 +} + +2022-07-29 20:35:19,692 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 106250 + total_bytes: 106250 +} + +2022-07-29 20:35:20,264 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 106250 + total_bytes: 106250 +} + +2022-07-29 20:35:20,623 INFO MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 106250 + total_bytes: 106250 +} +local_info { +} + +2022-07-29 20:35:22,185 INFO MainThread:1748149 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220729_191324-ovnz8vs0/run-ovnz8vs0.wandb b/wandb/run-20220729_191324-ovnz8vs0/run-ovnz8vs0.wandb new file mode 100644 index 0000000000000000000000000000000000000000..271c3bcc64380e651b07d8ebb9d19754b8ffe51f --- /dev/null +++ b/wandb/run-20220729_191324-ovnz8vs0/run-ovnz8vs0.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a1f120fdad4cd704fa8fa782f094f951375748df0a0368ade5826d6e49d2a1 +size 351403 diff --git a/wandb/run-20220729_213705-23c375az/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_213705-23c375az/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..125deac0a5e7358200646f43d7adfd25d2bbcc3e --- /dev/null +++ b/wandb/run-20220729_213705-23c375az/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1597 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_213705-23c375az/files/config.yaml b/wandb/run-20220729_213705-23c375az/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c1e2e3414ada42d2d59f6f5ff278f2eb4e6c8f5 --- /dev/null +++ b/wandb/run-20220729_213705-23c375az/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659130625 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_213705-23c375az/files/output.log b/wandb/run-20220729_213705-23c375az/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..321e4f8373aff394413cdc761e1b615793b921cd --- /dev/null +++ b/wandb/run-20220729_213705-23c375az/files/output.log @@ -0,0 +1,891 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_21-37-01_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=128, +per_device_train_batch_size=128, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 81.71it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 468.32it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a479b8802b3f5567.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b93c3063e1e3e193.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-153fb89ac3bac9ae.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2f20a68a38894f5b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-92f96a9049447122.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c1954d791f874a7f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c645a2a40522c3f0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24cfb6035bbadcbb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-09d11f511da96fdf.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8c47b02ac891d8ec.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-aac9470589015a7c.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-474509ecbe190df5.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/vocab.json from cache at /home/javierr/.cache/huggingface/transformers/dbb47708b30c45561aee53d4cc808c3ed6c4785df1e2b757c8852837f6f58537.cd4a163fb068cd596a09687d4b1d7e345279d1497fdf547b3c986f4d6ee855b8 +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/tokenizer_config.json from cache at /home/javierr/.cache/huggingface/transformers/32ac0765a83a8c538f4d3d8f104f69eab9648cb4d3e3a6bb6463185512f204d4.ff272c14a3ca512c9d9f85a898e6a086e8f790b06ff540f484000dab53132349 +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/added_tokens.json from cache at /home/javierr/.cache/huggingface/transformers/a86e893e88bb40472d24ffc4e2be032578bb14755e88bb1e961bad6276161a17.23a5f4e269f91a88f81d8cb3c6f881c8eb30ad98b9c9691569d4cfb87512d722 +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/special_tokens_map.json from cache at /home/javierr/.cache/huggingface/transformers/c0891e9bf7d08f1e6d4c0d7da3104155480b7f65b84d2e1ef0506f017fd60443.2fce57d7d6ae62285f9966753bbdba194f7ff904d5f380e3bb6a6242819f9a93 +Adding to the vocabulary +Adding to the vocabulary +https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmpcl_om3tp + + + + + + + + + + + + + + + + + + + + + + + +Downloading pytorch_model.bin: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.60G/3.60G [00:47<00:00, 82.1MB/s] +storing https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin in cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +creating metadata file for /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('project_hid', 'kernel'), ('quantizer', 'codevectors'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('project_hid', 'bias')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7733.71ex/s] +removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8335.60ex/s] +removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7811.68ex/s] +removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8176.30ex/s] +removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8055.27ex/s] +removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7889.70ex/s] +removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 6976.44ex/s] +removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7969.58ex/s] +removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7694.00ex/s] +removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7986.11ex/s] +removing punctuation from train split #7: 86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 8234/9523 [00:01<00:00, 6491.17ex/s] +removing punctuation from train split #6: 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9284/9523 [00:01<00:00, 7224.41ex/s] +removing punctuation from train split #7: 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9138/9523 [00:01<00:00, 7105.42ex/s] +removing punctuation from train split #8: 97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9233/9523 [00:01<00:00, 7266.53ex/s] +removing punctuation from train split #10: 85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8060/9523 [00:01<00:00, 7184.65ex/s] +removing punctuation from train split #15: 54%|██████████████████████████████████████████████████████████████████████████████████████████████████████████ | 5153/9522 [00:00<00:00, 5635.98ex/s] +removing punctuation from train split #11: 85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8124/9523 [00:01<00:00, 7121.05ex/s] +removing punctuation from train split #10: 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8916/9523 [00:01<00:00, 7542.07ex/s] +removing punctuation from train split #9: 92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8750/9523 [00:01<00:00, 6412.77ex/s] +removing punctuation from train split #12: 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8283/9522 [00:01<00:00, 7452.15ex/s] +removing punctuation from train split #15: 73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 6983/9522 [00:00<00:00, 7157.87ex/s] +removing punctuation from train split #16: 65%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 6219/9522 [00:00<00:00, 7162.74ex/s] +removing punctuation from train split #17: 66%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 6237/9522 [00:00<00:00, 7167.32ex/s] +removing punctuation from train split #13: 76%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 7210/9522 [00:01<00:00, 6207.16ex/s] +removing punctuation from train split #20: 47%|███████████████████████████████████████████████████████████████████████████████████████████▊ | 4458/9522 [00:00<00:00, 6375.22ex/s] +removing punctuation from train split #15: 83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 7894/9522 [00:01<00:00, 7677.64ex/s] +removing punctuation from train split #16: 75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 7162/9522 [00:00<00:00, 7771.07ex/s] +removing punctuation from train split #17: 75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 7179/9522 [00:00<00:00, 7769.81ex/s] +removing punctuation from train split #25: 0%| | 0/9522 [00:00=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_215007-14bu0ptz/files/config.yaml b/wandb/run-20220729_215007-14bu0ptz/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15f01565667c96392530efd33dfc36216286b866 --- /dev/null +++ b/wandb/run-20220729_215007-14bu0ptz/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659131407 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_215007-14bu0ptz/files/output.log b/wandb/run-20220729_215007-14bu0ptz/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e7038d36ff2897c0eec18bc8aa2a6161bd6a87f0 --- /dev/null +++ b/wandb/run-20220729_215007-14bu0ptz/files/output.log @@ -0,0 +1,1045 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_21-50-02_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=128, +per_device_train_batch_size=128, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 80.41it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 464.19it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +filtering NST #0: 46%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 3686/7930 [00:01<00:01, 2403.10ex/s] +filtering NST #1: 39%|██████████████████████████████████████████████████████████████████████████████████████ | 3086/7930 [00:01<00:02, 2093.09ex/s] +filtering NST #2: 36%|███████████████████████████████████████████████████████████████████████████████ | 2836/7930 [00:01<00:02, 2291.19ex/s] +filtering NST #3: 28%|█████████████████████████████████████████████████████████████▋ | 2214/7930 [00:01<00:04, 1285.27ex/s] +filtering NST #4: 27%|████████████████████████████████████████████████████████████▏ | 2161/7930 [00:01<00:05, 1145.05ex/s] +filtering NST #5: 28%|█████████████████████████████████████████████████████████████▏ | 2194/7930 [00:01<00:04, 1166.55ex/s] +filtering NST #6: 28%|█████████████████████████████████████████████████████████████ | 2190/7930 [00:01<00:04, 1205.53ex/s] +filtering NST #7: 41%|███████████████████████████████████████████████████████████████████████████████████████████▍ | 3282/7930 [00:01<00:02, 1928.98ex/s] +filtering NST #8: 50%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 3997/7930 [00:01<00:01, 2536.44ex/s] +filtering NST #9: 48%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 3811/7930 [00:01<00:01, 2422.65ex/s] +filtering NST #10: 33%|████████████████████████████████████████████████████████████████████████▉ | 2628/7930 [00:01<00:02, 2183.23ex/s] +filtering NST #11: 27%|██████████████████████████████████████████████████████████▉ | 2124/7930 [00:01<00:05, 1116.97ex/s] +filtering NST #12: 25%|██████████████████████████████████████████████████████▊ | 1974/7930 [00:01<00:04, 1323.00ex/s] +filtering NST #13: 25%|██████████████████████████████████████████████████████▉ | 1979/7929 [00:01<00:04, 1318.92ex/s] +filtering NST #14: 41%|█████████████████████████████████████████████████████████████████████████████████████████▉ | 3240/7929 [00:01<00:02, 2041.53ex/s] +filtering NST #15: 25%|███████████████████████████████████████████████████████▏ | 1989/7929 [00:01<00:04, 1328.70ex/s] +filtering NST #16: 46%|█████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 3656/7929 [00:01<00:01, 2319.68ex/s] +filtering NST #17: 45%|███████████████████████████████████████████████████████████████████████████████████████████████████▏ | 3573/7929 [00:01<00:01, 2290.56ex/s] +filtering NST #18: 26%|█████████████████████████████████████████████████████████▍ | 2072/7929 [00:01<00:05, 1108.22ex/s] +filtering NST #19: 46%|█████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 3674/7929 [00:01<00:01, 2345.95ex/s] +filtering NST #20: 47%|██████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 3707/7929 [00:01<00:01, 2384.67ex/s] +filtering NST #21: 46%|████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 3639/7929 [00:01<00:01, 2341.27ex/s] +filtering NST #22: 47%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 3726/7929 [00:01<00:01, 2398.60ex/s] +filtering NST #23: 47%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 3729/7929 [00:01<00:01, 2405.17ex/s] +filtering NST #24: 46%|█████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 3675/7929 [00:01<00:01, 2397.96ex/s] +filtering NST #25: 47%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 3718/7929 [00:01<00:01, 2352.52ex/s] +filtering NST #26: 47%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 3703/7929 [00:01<00:01, 2381.59ex/s] +filtering NST #27: 47%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 3703/7929 [00:01<00:01, 2389.93ex/s] +filtering NST #28: 45%|██████████████████████████████████████████████████████████████████████████████████████████████████▋ | 3558/7929 [00:01<00:01, 2295.16ex/s] +filtering NST #29: 25%|███████████████████████████████████████████████████████▏ | 1988/7929 [00:01<00:04, 1344.06ex/s] +filtering NST #30: 36%|████████████████████████████████████████████████████████████████████████████████▏ | 2890/7929 [00:01<00:01, 2543.20ex/s] +filtering NST #15: 62%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 4885/7929 [00:03<00:01, 1922.01ex/s] +filtering NST #21: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 7862/7929 [00:03<00:00, 2378.04ex/s] +filtering NST #18: 77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 6078/7929 [00:03<00:00, 2157.66ex/s] +filtering NST #31: 96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 7608/7929 [00:03<00:00, 2428.73ex/s] +filtering NST #18: 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 6362/7929 [00:03<00:00, 2334.01ex/s] +filtering NST #29: 73%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 5797/7929 [00:03<00:00, 2381.69ex/s] +filtering NST #28: 97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 7718/7929 [00:03<00:00, 2238.73ex/s] +filtering NST #31: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7881/7929 [00:03<00:00, 2510.70ex/s] +filtering NST #29: 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 6045/7929 [00:03<00:00, 2019.87ex/s] +filtering NST #30: 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7473/7929 [00:03<00:00, 2209.27ex/s] +filtering NST #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7929/7929 [00:04<00:00, 1597.96ex/s] +filtering NST #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7929/7929 [00:04<00:00, 1597.96ex/s] +filtering NST #1: 35%|████████████████████████████████████████████████████████████████████████████▊ | 817/2363 [00:00<00:00, 1885.26ex/s] +filtering NST #16: 73%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 1725/2363 [00:01<00:00, 1527.92ex/s] + @ 0x7f0d9fd50294 976 (unknown)fef75ef99,7f0fef56d0bf&map= ██████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 1725/2363 [00:01<00:00, 1527.92ex/s] +filtering NST #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1057/1057 [00:00<00:00, 2022.10ex/s] +E0729 21:50:39.818813 299646 process_state.cc:774] RAW: Raising signal 15 with default behaviorTERM.███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1057/1057 [00:00<00:00, 2022.10ex/s] +E0729 21:50:39.818920 299617 process_state.cc:774] RAW: Raising signal 15 with default behavior +E0729 21:50:39.819015 299649 process_state.cc:774] RAW: Raising signal 15 with default behavior +E0729 21:50:39.819175 299619 process_state.cc:774] RAW: Raising signal 15 with default behavior +E0729 21:50:39.819907 299656 process_state.cc:774] RAW: Raising signal 15 with default behavior +E0729 21:50:39.820575 299623 process_state.cc:774] RAW: Raising signal 15 with default behavior +E0729 21:50:39.820645 299611 process_state.cc:774] RAW: Raising signal 15 with default behavior +E0729 21:50:39.826479 299620 process_state.cc:774] RAW: Raising signal 15 with default behavior +E0729 21:50:39.831846 299653 process_state.cc:774] RAW: Raising signal 15 with default behavior +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +filtering NPSC #17: 82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1302/1595 [00:00<00:00, 2518.19ex/s] +filtering NPSC #20: 81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 1287/1595 [00:00<00:00, 3113.00ex/s] +filtering NPSC #21: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 1299/1595 [00:00<00:00, 3124.59ex/s] +filtering NPSC #23: 53%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 842/1595 [00:00<00:00, 4236.94ex/s] +filtering NPSC #22: 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 1280/1595 [00:00<00:00, 3046.53ex/s] +filtering NPSC #23: 79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1266/1595 [00:00<00:00, 2867.88ex/s] +filtering NPSC #24: 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 1276/1595 [00:00<00:00, 2888.52ex/s] +filtering NPSC #27: 44%|████████████████████████████████████████████████████████████████████████████████████████████████▏ | 697/1594 [00:00<00:00, 2239.06ex/s] +filtering NPSC #28: 52%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 821/1594 [00:00<00:00, 4126.90ex/s] +filtering NPSC #29: 53%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 848/1594 [00:00<00:00, 4262.97ex/s] +filtering NPSC #30: 30%|██████████████████████████████████████████████████████████████████▌ | 482/1594 [00:00<00:00, 2335.03ex/s] +filtering NPSC #30: 46%|████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 727/1594 [00:00<00:00, 2384.53ex/s] +filtering NPSC #31: 45%|██████████████████████████████████████████████████████████████████████████████████████████████████▍ | 713/1594 [00:00<00:00, 2377.36ex/s] +filtering NPSC #9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2863.23ex/s] +filtering NPSC #10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2942.58ex/s] +filtering NPSC #11: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2948.20ex/s] +filtering NPSC #12: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 3140.75ex/s] +filtering NPSC #13: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 3122.36ex/s] +filtering NPSC #17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 3777.13ex/s] +filtering NPSC #14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2914.54ex/s] +filtering NPSC #18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 3956.16ex/s] +filtering NPSC #16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 3201.51ex/s] +filtering NPSC #19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 4077.04ex/s] +filtering NPSC #20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 4035.87ex/s] +filtering NPSC #15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2494.70ex/s] +filtering NPSC #21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 4031.17ex/s] +filtering NPSC #23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 3869.40ex/s] +filtering NPSC #24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 3657.49ex/s] +filtering NPSC #25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 3978.18ex/s] +filtering NPSC #26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 4042.04ex/s] +filtering NPSC #22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2868.35ex/s] +filtering NPSC #27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 3951.27ex/s] +filtering NPSC #28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 4010.50ex/s] +filtering NPSC #29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 4083.08ex/s] +filtering NPSC #30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 4069.21ex/s] +filtering NPSC #31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 4154.30ex/s] +filtering NPSC #21: 0%| | 0/214 [00:00 to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'bias'), ('project_hid', 'kernel'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'codevectors'), ('project_hid', 'bias')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 38%|██████████████████████████████████████████████████████████████████████████▋ | 3612/9523 [00:00<00:00, 9005.42ex/s] +removing punctuation from train split #1: 38%|█████████████████████████████████████████████████████████████████████████▉ | 3576/9523 [00:00<00:00, 8729.88ex/s] +removing punctuation from train split #2: 38%|██████████████████████████████████████████████████████████████████████████▋ | 3610/9523 [00:00<00:00, 8635.06ex/s] +removing punctuation from train split #3: 27%|█████████████████████████████████████████████████████▏ | 2573/9523 [00:00<00:00, 8169.76ex/s] +removing punctuation from train split #4: 28%|██████████████████████████████████████████████████████▋ | 2645/9523 [00:00<00:00, 8221.26ex/s] +removing punctuation from train split #5: 18%|███████████████████████████████████▏ | 1702/9523 [00:00<00:00, 8558.55ex/s] +removing punctuation from train split #6: 18%|██████████████████████████████████▊ | 1681/9523 [00:00<00:00, 8425.32ex/s] +removing punctuation from train split #7: 8%|████████████████▊ | 808/9523 [00:00<00:01, 8072.29ex/s] +removing punctuation from train split #8: 8%|████████████████▋ | 805/9523 [00:00<00:01, 8040.58ex/s] +removing punctuation from train split #9: 0%| | 0/9523 [00:00 + main() | 0/3 [00:00=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_224434-36odnm43/files/config.yaml b/wandb/run-20220729_224434-36odnm43/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5e0a7760e8ef146c889f2d8c223a91c6a63835e --- /dev/null +++ b/wandb/run-20220729_224434-36odnm43/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659134674 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_224434-36odnm43/files/output.log b/wandb/run-20220729_224434-36odnm43/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7cf6214d8c08a26fe762fe685448259df3a3af47 --- /dev/null +++ b/wandb/run-20220729_224434-36odnm43/files/output.log @@ -0,0 +1,903 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_22-44-30_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=128, +per_device_train_batch_size=128, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 79.13it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 443.72it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/vocab.json from cache at /home/javierr/.cache/huggingface/transformers/dbb47708b30c45561aee53d4cc808c3ed6c4785df1e2b757c8852837f6f58537.cd4a163fb068cd596a09687d4b1d7e345279d1497fdf547b3c986f4d6ee855b8 +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/tokenizer_config.json from cache at /home/javierr/.cache/huggingface/transformers/32ac0765a83a8c538f4d3d8f104f69eab9648cb4d3e3a6bb6463185512f204d4.ff272c14a3ca512c9d9f85a898e6a086e8f790b06ff540f484000dab53132349 +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/added_tokens.json from cache at /home/javierr/.cache/huggingface/transformers/a86e893e88bb40472d24ffc4e2be032578bb14755e88bb1e961bad6276161a17.23a5f4e269f91a88f81d8cb3c6f881c8eb30ad98b9c9691569d4cfb87512d722 +loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/special_tokens_map.json from cache at /home/javierr/.cache/huggingface/transformers/c0891e9bf7d08f1e6d4c0d7da3104155480b7f65b84d2e1ef0506f017fd60443.2fce57d7d6ae62285f9966753bbdba194f7ff904d5f380e3bb6a6242819f9a93 +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('quantizer', 'weight_proj', 'bias'), ('project_hid', 'kernel'), ('project_q', 'bias'), ('quantizer', 'codevectors'), ('project_hid', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'kernel')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 49%|████████████████████████████████████████████████████████████████████████████████████████████████▍ | 4664/9523 [00:00<00:00, 9466.64ex/s] +removing punctuation from train split #1: 38%|██████████████████████████████████████████████████████████████████████████▎ | 3594/9523 [00:00<00:00, 9256.81ex/s] +removing punctuation from train split #2: 38%|███████████████████████████████████████████████████████████████████████████▏ | 3633/9523 [00:00<00:00, 9358.53ex/s] +removing punctuation from train split #3: 28%|██████████████████████████████████████████████████████▌ | 2635/9523 [00:00<00:00, 8984.44ex/s] +removing punctuation from train split #4: 29%|██████████████████████████████████████████████████████████ | 2806/9523 [00:00<00:00, 9466.14ex/s] +removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8682.42ex/s] +removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8768.42ex/s] +removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8710.81ex/s] +removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8103.19ex/s] +removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8712.19ex/s] +removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8483.91ex/s] +removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8506.00ex/s] +removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8424.56ex/s] +removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8425.47ex/s] +removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8593.40ex/s] +removing punctuation from train split #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8417.50ex/s] +removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8261.69ex/s] +removing punctuation from train split #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8576.67ex/s] +removing punctuation from train split #18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8627.76ex/s] +removing punctuation from train split #19: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8564.63ex/s] +removing punctuation from train split #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8116.44ex/s] +removing punctuation from train split #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8357.09ex/s] +removing punctuation from train split #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8664.23ex/s] +removing punctuation from train split #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8866.41ex/s] +removing punctuation from train split #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8613.35ex/s] +removing punctuation from train split #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8916.53ex/s] +removing punctuation from train split #13: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9462/9522 [00:01<00:00, 8535.89ex/s] +removing punctuation from train split #17: 62%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 5951/9522 [00:00<00:00, 8429.16ex/s] +removing punctuation from train split #15: 89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8448/9522 [00:01<00:00, 8529.83ex/s] +removing punctuation from train split #17: 72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 6839/9522 [00:00<00:00, 8569.69ex/s] +removing punctuation from train split #15: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9313/9522 [00:01<00:00, 8565.31ex/s] +removing punctuation from train split #16: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8354/9522 [00:01<00:00, 8458.16ex/s] +removing punctuation from train split #16: 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9221/9522 [00:01<00:00, 8520.83ex/s] +removing punctuation from train split #18: 82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 7795/9522 [00:00<00:00, 8700.55ex/s] +removing punctuation from train split #17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 9515/9522 [00:01<00:00, 8788.97ex/s] +removing punctuation from train split #25: 8%|████████████████▎ | 790/9522 [00:00<00:01, 7895.45ex/s] +removing punctuation from train split #19: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7707/9522 [00:00<00:00, 8685.02ex/s] +removing punctuation from train split #27: 0%| | 0/9522 [00:00 to the vocabulary +Adding to the vocabulary +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1605, in + main() + File "run_flax_speech_recognition_ctc.py", line 1209, in main + processor = AutoProcessor.from_pretrained(training_args.output_dir) + File "/data/flax/lib/python3.8/site-packages/transformers/models/auto/processing_auto.py", line 243, in from_pretrained + return processor_class.from_pretrained( + File "/data/flax/lib/python3.8/site-packages/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py", line 144, in from_pretrained + decoder = BeamSearchDecoderCTC.load_from_dir(pretrained_model_name_or_path) + File "/data/flax/lib/python3.8/site-packages/pyctcdecode/decoder.py", line 790, in load_from_dir + filenames = cls.parse_directory_contents(filepath) + File "/data/flax/lib/python3.8/site-packages/pyctcdecode/decoder.py", line 765, in parse_directory_contents + raise ValueError( +ValueError: Could not find alphabet file alphabet.json. Found ['preprocessor_config.json', 'wandb', 'vocab.json', 'config.json', 'run.sh', 'special_tokens_map.json', 'README.md', 'models', 'added_tokens.json', 'tokenizer_config.json', 'run_flax_speech_recognition_ctc.py'] \ No newline at end of file diff --git a/wandb/run-20220729_224434-36odnm43/files/requirements.txt b/wandb/run-20220729_224434-36odnm43/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220729_224434-36odnm43/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220729_224434-36odnm43/files/wandb-metadata.json b/wandb/run-20220729_224434-36odnm43/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..53da0a35a953e76358da65e762332594a0ad2ed0 --- /dev/null +++ b/wandb/run-20220729_224434-36odnm43/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-29T22:44:37.985063", + "startedAt": "2022-07-29T22:44:34.625516", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=128", + "--per_device_eval_batch_size=128", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json b/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..3ed4accf330a32a60ea895077e92f1e740703b4b --- /dev/null +++ b/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 98}} \ No newline at end of file diff --git a/wandb/run-20220729_224434-36odnm43/logs/debug-internal.log b/wandb/run-20220729_224434-36odnm43/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..129d6316553b7783e9beddfbfe4df131c6948157 --- /dev/null +++ b/wandb/run-20220729_224434-36odnm43/logs/debug-internal.log @@ -0,0 +1,191 @@ +2022-07-29 22:44:35,544 INFO MainThread:3538816 [internal.py:wandb_internal():87] W&B internal server running at pid: 3538816, started at: 2022-07-29 22:44:35.543810 +2022-07-29 22:44:35,546 INFO WriterThread:3538816 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/run-36odnm43.wandb +2022-07-29 22:44:35,546 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: check_version +2022-07-29 22:44:35,546 DEBUG SenderThread:3538816 [sender.py:send():234] send: header +2022-07-29 22:44:35,547 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: check_version +2022-07-29 22:44:35,585 DEBUG SenderThread:3538816 [sender.py:send():234] send: run +2022-07-29 22:44:35,751 INFO SenderThread:3538816 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files +2022-07-29 22:44:35,751 INFO SenderThread:3538816 [sender.py:_start_run_threads():804] run started: 36odnm43 with start time 1659134674 +2022-07-29 22:44:35,752 DEBUG SenderThread:3538816 [sender.py:send():234] send: summary +2022-07-29 22:44:35,752 INFO SenderThread:3538816 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 22:44:35,753 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: run_start +2022-07-29 22:44:36,753 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json +2022-07-29 22:44:37,984 DEBUG HandlerThread:3538816 [meta.py:__init__():40] meta init +2022-07-29 22:44:37,984 DEBUG HandlerThread:3538816 [meta.py:__init__():54] meta init done +2022-07-29 22:44:37,985 DEBUG HandlerThread:3538816 [meta.py:probe():214] probe +2022-07-29 22:44:37,986 DEBUG HandlerThread:3538816 [meta.py:_setup_git():204] setup git +2022-07-29 22:44:38,023 DEBUG HandlerThread:3538816 [meta.py:_setup_git():211] setup git done +2022-07-29 22:44:38,023 DEBUG HandlerThread:3538816 [meta.py:_save_code():92] save code +2022-07-29 22:44:38,036 DEBUG HandlerThread:3538816 [meta.py:_save_code():113] save code done +2022-07-29 22:44:38,036 DEBUG HandlerThread:3538816 [meta.py:_save_patches():130] save patches +2022-07-29 22:44:38,096 DEBUG HandlerThread:3538816 [meta.py:_save_patches():172] save patches done +2022-07-29 22:44:38,096 DEBUG HandlerThread:3538816 [meta.py:_save_pip():58] save pip +2022-07-29 22:44:38,097 DEBUG HandlerThread:3538816 [meta.py:_save_pip():72] save pip done +2022-07-29 22:44:38,097 DEBUG HandlerThread:3538816 [meta.py:probe():252] probe done +2022-07-29 22:44:38,100 DEBUG SenderThread:3538816 [sender.py:send():234] send: files +2022-07-29 22:44:38,100 INFO SenderThread:3538816 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-29 22:44:38,101 INFO SenderThread:3538816 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-29 22:44:38,107 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:44:38,108 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:44:38,589 INFO Thread-11 :3538816 [upload_job.py:push():137] Uploaded file /tmp/tmprqhm2s9uwandb/3nhzs38m-wandb-metadata.json +2022-07-29 22:44:38,757 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-metadata.json +2022-07-29 22:44:38,757 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:44:38,757 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/code/run_flax_speech_recognition_ctc.py +2022-07-29 22:44:38,757 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/requirements.txt +2022-07-29 22:44:38,757 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/code +2022-07-29 22:44:38,906 INFO Thread-12 :3538816 [upload_job.py:push():137] Uploaded file /tmp/tmprqhm2s9uwandb/11rqutlz-code/run_flax_speech_recognition_ctc.py +2022-07-29 22:44:40,758 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:44:42,759 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:44:44,759 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:44:46,761 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:44:52,763 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:44:53,261 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:44:53,262 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:44:54,764 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:44:56,765 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:06,056 DEBUG SenderThread:3538816 [sender.py:send():234] send: stats +2022-07-29 22:45:08,396 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:45:08,397 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:45:08,771 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:10,772 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:19,777 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:21,778 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:23,534 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:45:23,534 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:45:23,779 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:35,785 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:36,128 DEBUG SenderThread:3538816 [sender.py:send():234] send: stats +2022-07-29 22:45:37,787 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:38,817 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:45:38,818 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:45:47,792 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:49,793 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:51,794 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:53,796 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:53,986 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:45:53,986 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:45:55,797 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:57,798 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:45:59,799 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:46:01,800 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:46:03,801 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:46:05,802 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:46:06,195 DEBUG SenderThread:3538816 [sender.py:send():234] send: stats +2022-07-29 22:46:07,803 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:46:09,156 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:46:09,156 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:46:09,804 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:46:11,805 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:46:13,806 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:46:14,660 DEBUG SenderThread:3538816 [sender.py:send():234] send: telemetry +2022-07-29 22:46:14,660 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:14,660 DEBUG SenderThread:3538816 [sender.py:send():234] send: exit +2022-07-29 22:46:14,661 INFO SenderThread:3538816 [sender.py:send_exit():366] handling exit code: 1 +2022-07-29 22:46:14,661 INFO SenderThread:3538816 [sender.py:send_exit():368] handling runtime: 98 +2022-07-29 22:46:14,661 INFO SenderThread:3538816 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 22:46:14,661 INFO SenderThread:3538816 [sender.py:send_exit():374] send defer +2022-07-29 22:46:14,662 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:14,662 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer +2022-07-29 22:46:14,662 INFO HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-29 22:46:14,663 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: defer +2022-07-29 22:46:14,663 INFO SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-29 22:46:14,663 INFO SenderThread:3538816 [sender.py:transition_state():387] send defer: 1 +2022-07-29 22:46:14,663 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer +2022-07-29 22:46:14,663 INFO HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-29 22:46:14,715 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: defer +2022-07-29 22:46:14,715 INFO SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-29 22:46:14,715 INFO SenderThread:3538816 [sender.py:transition_state():387] send defer: 2 +2022-07-29 22:46:14,715 DEBUG SenderThread:3538816 [sender.py:send():234] send: stats +2022-07-29 22:46:14,716 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer +2022-07-29 22:46:14,716 INFO HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-29 22:46:14,716 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: defer +2022-07-29 22:46:14,716 INFO SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-29 22:46:14,716 INFO SenderThread:3538816 [sender.py:transition_state():387] send defer: 3 +2022-07-29 22:46:14,717 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer +2022-07-29 22:46:14,717 INFO HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-29 22:46:14,717 DEBUG SenderThread:3538816 [sender.py:send():234] send: summary +2022-07-29 22:46:14,717 INFO SenderThread:3538816 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 22:46:14,717 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: defer +2022-07-29 22:46:14,717 INFO SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-29 22:46:14,717 INFO SenderThread:3538816 [sender.py:transition_state():387] send defer: 4 +2022-07-29 22:46:14,718 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer +2022-07-29 22:46:14,718 INFO HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-29 22:46:14,718 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: defer +2022-07-29 22:46:14,718 INFO SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-29 22:46:14,764 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:14,807 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:46:14,807 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json +2022-07-29 22:46:14,918 INFO SenderThread:3538816 [sender.py:transition_state():387] send defer: 5 +2022-07-29 22:46:14,919 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:14,919 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer +2022-07-29 22:46:14,919 INFO HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-29 22:46:14,919 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: defer +2022-07-29 22:46:14,919 INFO SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-29 22:46:14,919 INFO SenderThread:3538816 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-29 22:46:15,020 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:15,807 INFO Thread-8 :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/config.yaml +2022-07-29 22:46:15,808 INFO SenderThread:3538816 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files +2022-07-29 22:46:15,808 INFO SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/config.yaml config.yaml +2022-07-29 22:46:15,808 INFO SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/requirements.txt requirements.txt +2022-07-29 22:46:15,809 INFO SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log output.log +2022-07-29 22:46:15,809 INFO SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json wandb-summary.json +2022-07-29 22:46:15,809 INFO SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-metadata.json wandb-metadata.json +2022-07-29 22:46:15,809 INFO SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-29 22:46:15,815 INFO SenderThread:3538816 [sender.py:transition_state():387] send defer: 6 +2022-07-29 22:46:15,815 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:15,819 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer +2022-07-29 22:46:15,819 INFO HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-29 22:46:15,819 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: defer +2022-07-29 22:46:15,819 INFO SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-29 22:46:15,819 INFO SenderThread:3538816 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 22:46:15,921 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:15,921 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:16,022 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:16,023 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:16,124 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:16,125 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:16,226 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:16,226 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:16,281 INFO Thread-14 :3538816 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/requirements.txt +2022-07-29 22:46:16,285 INFO Thread-13 :3538816 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/config.yaml +2022-07-29 22:46:16,304 INFO Thread-16 :3538816 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json +2022-07-29 22:46:16,328 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:16,328 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:16,430 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:16,430 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:16,484 INFO Thread-15 :3538816 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log +2022-07-29 22:46:16,531 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:16,531 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:16,633 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:16,633 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:16,685 INFO Thread-7 :3538816 [sender.py:transition_state():387] send defer: 7 +2022-07-29 22:46:16,686 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer +2022-07-29 22:46:16,687 INFO HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-29 22:46:16,687 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: defer +2022-07-29 22:46:16,687 INFO SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-29 22:46:16,734 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:17,278 INFO SenderThread:3538816 [sender.py:transition_state():387] send defer: 8 +2022-07-29 22:46:17,278 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:17,279 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer +2022-07-29 22:46:17,279 INFO HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-29 22:46:17,280 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: defer +2022-07-29 22:46:17,280 INFO SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-29 22:46:17,280 INFO SenderThread:3538816 [sender.py:transition_state():387] send defer: 9 +2022-07-29 22:46:17,280 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer +2022-07-29 22:46:17,280 INFO HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-29 22:46:17,280 DEBUG SenderThread:3538816 [sender.py:send():234] send: final +2022-07-29 22:46:17,280 DEBUG SenderThread:3538816 [sender.py:send():234] send: footer +2022-07-29 22:46:17,280 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: defer +2022-07-29 22:46:17,281 INFO SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-29 22:46:17,380 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 22:46:17,380 DEBUG SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 22:46:17,381 INFO SenderThread:3538816 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 22:46:17,641 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: get_summary +2022-07-29 22:46:17,642 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-29 22:46:17,642 DEBUG HandlerThread:3538816 [handler.py:handle_request():130] handle_request: shutdown +2022-07-29 22:46:17,642 INFO HandlerThread:3538816 [handler.py:finish():731] shutting down handler +2022-07-29 22:46:18,281 INFO WriterThread:3538816 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/run-36odnm43.wandb +2022-07-29 22:46:18,640 INFO SenderThread:3538816 [sender.py:finish():1070] shutting down sender +2022-07-29 22:46:18,640 INFO SenderThread:3538816 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 22:46:18,640 INFO SenderThread:3538816 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 22:46:18,643 INFO MainThread:3538816 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220729_224434-36odnm43/logs/debug.log b/wandb/run-20220729_224434-36odnm43/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..7ea0d2be71419b182619e7eaa3b26e2593556cfa --- /dev/null +++ b/wandb/run-20220729_224434-36odnm43/logs/debug.log @@ -0,0 +1,148 @@ +2022-07-29 22:44:34,627 INFO MainThread:3537462 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-29 22:44:34,627 INFO MainThread:3537462 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-29 22:44:34,627 INFO MainThread:3537462 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/logs/debug.log +2022-07-29 22:44:34,627 INFO MainThread:3537462 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/logs/debug-internal.log +2022-07-29 22:44:34,627 INFO MainThread:3537462 [wandb_init.py:init():404] calling init triggers +2022-07-29 22:44:34,627 INFO MainThread:3537462 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-29 22:44:34,627 INFO MainThread:3537462 [wandb_init.py:init():460] starting backend +2022-07-29 22:44:34,627 INFO MainThread:3537462 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-29 22:44:34,692 INFO MainThread:3537462 [backend.py:ensure_launched():216] starting backend process... +2022-07-29 22:44:34,737 INFO MainThread:3537462 [backend.py:ensure_launched():221] started backend process with pid: 3538816 +2022-07-29 22:44:34,739 INFO MainThread:3537462 [wandb_init.py:init():469] backend started and connected +2022-07-29 22:44:34,754 INFO MainThread:3537462 [wandb_init.py:init():533] updated telemetry +2022-07-29 22:44:34,864 INFO MainThread:3537462 [wandb_init.py:init():563] communicating current version +2022-07-29 22:44:35,583 INFO MainThread:3537462 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-29 22:44:35,584 INFO MainThread:3537462 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-29 22:44:35,753 INFO MainThread:3537462 [wandb_init.py:init():606] starting run threads in backend +2022-07-29 22:44:38,104 INFO MainThread:3537462 [wandb_run.py:_console_start():1810] atexit reg +2022-07-29 22:44:38,104 INFO MainThread:3537462 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-29 22:44:38,105 INFO MainThread:3537462 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-29 22:44:38,107 INFO MainThread:3537462 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-29 22:44:38,107 INFO MainThread:3537462 [wandb_init.py:init():633] run started, returning control to user process +2022-07-29 22:46:12,243 INFO MainThread:3537462 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-29 22:46:12,247 INFO MainThread:3537462 [wandb_run.py:_restore():1752] restore +2022-07-29 22:46:14,662 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73946 + total_bytes: 73946 +} + +2022-07-29 22:46:14,919 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73946 + total_bytes: 73946 +} + +2022-07-29 22:46:15,820 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73946 + total_bytes: 261315 +} + +2022-07-29 22:46:15,922 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73946 + total_bytes: 261315 +} + +2022-07-29 22:46:16,023 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 261315 + total_bytes: 261315 +} + +2022-07-29 22:46:16,125 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 261315 + total_bytes: 261315 +} + +2022-07-29 22:46:16,227 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 261315 + total_bytes: 261315 +} + +2022-07-29 22:46:16,329 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 261315 + total_bytes: 261315 +} + +2022-07-29 22:46:16,430 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 261315 + total_bytes: 261315 +} + +2022-07-29 22:46:16,532 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 261315 + total_bytes: 261315 +} + +2022-07-29 22:46:16,634 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 261315 + total_bytes: 261315 +} + +2022-07-29 22:46:17,279 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 261315 + total_bytes: 261315 +} + +2022-07-29 22:46:17,640 INFO MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 261315 + total_bytes: 261315 +} +local_info { +} + +2022-07-29 22:46:19,154 INFO MainThread:3537462 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220729_224434-36odnm43/run-36odnm43.wandb b/wandb/run-20220729_224434-36odnm43/run-36odnm43.wandb new file mode 100644 index 0000000000000000000000000000000000000000..c091db00292617e4879cb0f58fef68db83c1776c --- /dev/null +++ b/wandb/run-20220729_224434-36odnm43/run-36odnm43.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5097f7267ca776d7a5ef9b9fc09a67bd4d22fd687ab2721c70b19c6e2d2c165 +size 187832 diff --git a/wandb/run-20220729_225502-398l7dkj/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_225502-398l7dkj/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac --- /dev/null +++ b/wandb/run-20220729_225502-398l7dkj/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_225502-398l7dkj/files/config.yaml b/wandb/run-20220729_225502-398l7dkj/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ac95417d545afa2ab00e2c8ab3be06297e65197 --- /dev/null +++ b/wandb/run-20220729_225502-398l7dkj/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659135302 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_225502-398l7dkj/files/output.log b/wandb/run-20220729_225502-398l7dkj/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..41bd58fdd7196843dddd2ef6e441e979aa032bc0 --- /dev/null +++ b/wandb/run-20220729_225502-398l7dkj/files/output.log @@ -0,0 +1,1111 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_22-54-58_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=128, +per_device_train_batch_size=128, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 79.14it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 438.46it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'kernel'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'bias'), ('project_hid', 'bias'), ('quantizer', 'codevectors')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8908.66ex/s] +removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8922.00ex/s] +removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8639.32ex/s] +removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8852.72ex/s] +removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7630.58ex/s] +removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8552.85ex/s] +removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8690.10ex/s] +removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8399.08ex/s] +removing punctuation from train split #6: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8169/9523 [00:00<00:00, 8175.26ex/s] +removing punctuation from train split #5: 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8933/9523 [00:01<00:00, 8077.77ex/s] +removing punctuation from train split #9: 35%|█████████████████████████████████████████████████████████████████████▎ | 3351/9523 [00:00<00:00, 8461.01ex/s] +removing punctuation from train split #6: 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9025/9523 [00:01<00:00, 8283.45ex/s] +removing punctuation from train split #7: 64%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 6060/9523 [00:00<00:00, 8680.27ex/s] +removing punctuation from train split #9: 44%|███████████████████████████████████████████████████████████████████████████████████████▏ | 4213/9523 [00:00<00:00, 8517.91ex/s] +removing punctuation from train split #7: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 9484/9523 [00:01<00:00, 8220.08ex/s] +removing punctuation from train split #8: 75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 7143/9523 [00:01<00:00, 6487.45ex/s] +removing punctuation from train split #9: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7672/9523 [00:00<00:00, 7900.51ex/s] +removing punctuation from train split #10: 82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 7785/9523 [00:00<00:00, 8104.51ex/s] +removing punctuation from train split #10: 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8660/9523 [00:01<00:00, 8291.78ex/s] +removing punctuation from train split #12: 72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 6839/9522 [00:00<00:00, 8631.85ex/s] +removing punctuation from train split #12: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7703/9522 [00:00<00:00, 8030.92ex/s] +removing punctuation from train split #14: 61%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 5823/9522 [00:00<00:00, 8420.75ex/s] +removing punctuation from train split #15: 53%|███████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 5020/9522 [00:00<00:00, 8385.01ex/s] +removing punctuation from train split #15: 63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 5977/9522 [00:00<00:00, 8760.16ex/s] +removing punctuation from train split #16: 62%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 5869/9522 [00:00<00:00, 8467.55ex/s] +removing punctuation from train split #18: 34%|██████████████████████████████████████████████████████████████████▌ | 3233/9522 [00:00<00:00, 8188.93ex/s] +removing punctuation from train split #18: 43%|███████████████████████████████████████████████████████████████████████████████████▋ | 4064/9522 [00:00<00:00, 8229.90ex/s] +removing punctuation from train split #20: 26%|██████████████████████████████████████████████████▎ | 2446/9522 [00:00<00:00, 8223.13ex/s] +removing punctuation from train split #20: 35%|███████████████████████████████████████████████████████████████████▋ | 3288/9522 [00:00<00:00, 8298.23ex/s] +removing punctuation from train split #22: 16%|██████████████████████████████▉ | 1500/9522 [00:00<00:01, 7600.24ex/s] +removing punctuation from train split #22: 25%|████████████████████████████████████████████████▏ | 2344/9522 [00:00<00:00, 7978.19ex/s] +removing punctuation from train split #24: 8%|███████████████▊ | 764/9522 [00:00<00:01, 7637.08ex/s] +removing punctuation from train split #25: 0%| | 0/9522 [00:00 to the vocabulary +Adding to the vocabulary +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1605, in + main() + File "run_flax_speech_recognition_ctc.py", line 1209, in main + processor = AutoProcessor.from_pretrained(training_args.output_dir) + File "/data/flax/lib/python3.8/site-packages/transformers/models/auto/processing_auto.py", line 243, in from_pretrained + return processor_class.from_pretrained( + File "/data/flax/lib/python3.8/site-packages/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py", line 144, in from_pretrained + decoder = BeamSearchDecoderCTC.load_from_dir(pretrained_model_name_or_path) + File "/data/flax/lib/python3.8/site-packages/pyctcdecode/decoder.py", line 790, in load_from_dir + filenames = cls.parse_directory_contents(filepath) + File "/data/flax/lib/python3.8/site-packages/pyctcdecode/decoder.py", line 765, in parse_directory_contents + raise ValueError( +ValueError: Could not find alphabet file alphabet.json. Found ['preprocessor_config.json', 'wandb', 'vocab.json', 'config.json', 'run.sh', 'special_tokens_map.json', 'README.md', 'models', 'added_tokens.json', 'tokenizer_config.json', 'run_flax_speech_recognition_ctc.py'] \ No newline at end of file diff --git a/wandb/run-20220729_225502-398l7dkj/files/requirements.txt b/wandb/run-20220729_225502-398l7dkj/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220729_225502-398l7dkj/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220729_225502-398l7dkj/files/wandb-metadata.json b/wandb/run-20220729_225502-398l7dkj/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..737fedb9c437b13786c2b34e9005b0856ab8efdf --- /dev/null +++ b/wandb/run-20220729_225502-398l7dkj/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-29T22:55:05.876646", + "startedAt": "2022-07-29T22:55:02.544361", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=./", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=128", + "--per_device_eval_batch_size=128", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json b/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..c1a4b169504c2c5ccd13d164b180d5f2c4a73be4 --- /dev/null +++ b/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 714}} \ No newline at end of file diff --git a/wandb/run-20220729_225502-398l7dkj/logs/debug-internal.log b/wandb/run-20220729_225502-398l7dkj/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..af956dbd1c5eca6079c17b04182416e65e9f57fa --- /dev/null +++ b/wandb/run-20220729_225502-398l7dkj/logs/debug-internal.log @@ -0,0 +1,508 @@ +2022-07-29 22:55:03,445 INFO MainThread:3550240 [internal.py:wandb_internal():87] W&B internal server running at pid: 3550240, started at: 2022-07-29 22:55:03.445579 +2022-07-29 22:55:03,447 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: check_version +2022-07-29 22:55:03,447 INFO WriterThread:3550240 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/run-398l7dkj.wandb +2022-07-29 22:55:03,448 DEBUG SenderThread:3550240 [sender.py:send():234] send: header +2022-07-29 22:55:03,448 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: check_version +2022-07-29 22:55:03,485 DEBUG SenderThread:3550240 [sender.py:send():234] send: run +2022-07-29 22:55:03,659 INFO SenderThread:3550240 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files +2022-07-29 22:55:03,659 INFO SenderThread:3550240 [sender.py:_start_run_threads():804] run started: 398l7dkj with start time 1659135302 +2022-07-29 22:55:03,660 DEBUG SenderThread:3550240 [sender.py:send():234] send: summary +2022-07-29 22:55:03,660 INFO SenderThread:3550240 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 22:55:03,661 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: run_start +2022-07-29 22:55:04,663 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json +2022-07-29 22:55:05,876 DEBUG HandlerThread:3550240 [meta.py:__init__():40] meta init +2022-07-29 22:55:05,876 DEBUG HandlerThread:3550240 [meta.py:__init__():54] meta init done +2022-07-29 22:55:05,876 DEBUG HandlerThread:3550240 [meta.py:probe():214] probe +2022-07-29 22:55:05,877 DEBUG HandlerThread:3550240 [meta.py:_setup_git():204] setup git +2022-07-29 22:55:05,915 DEBUG HandlerThread:3550240 [meta.py:_setup_git():211] setup git done +2022-07-29 22:55:05,915 DEBUG HandlerThread:3550240 [meta.py:_save_code():92] save code +2022-07-29 22:55:05,928 DEBUG HandlerThread:3550240 [meta.py:_save_code():113] save code done +2022-07-29 22:55:05,928 DEBUG HandlerThread:3550240 [meta.py:_save_patches():130] save patches +2022-07-29 22:55:05,987 DEBUG HandlerThread:3550240 [meta.py:_save_patches():172] save patches done +2022-07-29 22:55:05,987 DEBUG HandlerThread:3550240 [meta.py:_save_pip():58] save pip +2022-07-29 22:55:05,988 DEBUG HandlerThread:3550240 [meta.py:_save_pip():72] save pip done +2022-07-29 22:55:05,988 DEBUG HandlerThread:3550240 [meta.py:probe():252] probe done +2022-07-29 22:55:05,991 DEBUG SenderThread:3550240 [sender.py:send():234] send: files +2022-07-29 22:55:05,991 INFO SenderThread:3550240 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-29 22:55:05,992 INFO SenderThread:3550240 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-29 22:55:05,999 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:55:05,999 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:55:06,492 INFO Thread-11 :3550240 [upload_job.py:push():137] Uploaded file /tmp/tmpdst8kfh8wandb/24kwmtby-wandb-metadata.json +2022-07-29 22:55:06,665 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/requirements.txt +2022-07-29 22:55:06,666 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-metadata.json +2022-07-29 22:55:06,666 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/code/run_flax_speech_recognition_ctc.py +2022-07-29 22:55:06,666 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:55:06,666 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/code +2022-07-29 22:55:07,596 INFO Thread-12 :3550240 [upload_job.py:push():137] Uploaded file /tmp/tmpdst8kfh8wandb/2ri72msf-code/run_flax_speech_recognition_ctc.py +2022-07-29 22:55:08,666 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:55:10,667 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:55:12,669 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:55:14,670 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:55:20,673 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:55:21,138 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:55:21,139 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:55:22,674 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:55:33,961 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 22:55:34,680 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:55:36,296 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:55:36,296 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:55:36,681 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:55:46,686 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:55:48,687 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:55:51,457 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:55:51,457 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:56:03,693 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:56:04,030 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 22:56:06,780 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:56:06,781 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:56:21,938 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:56:21,939 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:56:34,096 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 22:56:37,225 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:56:37,226 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:56:45,711 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:56:47,712 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:56:49,713 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:56:51,714 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:56:52,402 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:56:52,402 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:56:53,715 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:56:55,716 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:56:57,717 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:56:59,718 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:01,719 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:03,720 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:04,162 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 22:57:05,721 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:07,594 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:57:07,594 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:57:07,722 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:10,724 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:12,725 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:14,726 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:16,727 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:18,728 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:20,729 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:22,730 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:22,750 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:57:22,750 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:57:24,731 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:26,732 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:28,733 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:30,734 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:32,735 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:34,226 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 22:57:34,736 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:36,737 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:37,900 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:57:37,901 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:57:38,738 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:40,740 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:42,741 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:44,742 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:46,743 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:48,744 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:50,745 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:52,746 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:53,042 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:57:53,042 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:57:54,747 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:56,748 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:57:58,749 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:00,750 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:02,751 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:04,302 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 22:58:04,752 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:06,753 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:08,177 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:58:08,177 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:58:08,754 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:10,755 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:12,756 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:14,757 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:16,758 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:18,760 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:20,761 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:22,762 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:23,321 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:58:23,321 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:58:24,763 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:26,764 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:28,765 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:30,766 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:32,768 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:34,388 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 22:58:34,769 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:36,770 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:38,465 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:58:38,490 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:58:38,771 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:40,772 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:42,773 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:44,774 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:46,776 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:48,777 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:50,778 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:52,779 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:53,630 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:58:53,630 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:58:54,780 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:56,781 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:58:58,782 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:00,783 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:02,784 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:04,464 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 22:59:04,785 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:06,786 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:08,769 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:59:08,769 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:59:08,787 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:10,788 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:12,789 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:14,790 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:16,791 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:18,793 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:20,794 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:22,795 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:23,933 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:59:23,934 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:59:24,796 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:26,797 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:28,798 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:30,799 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:32,800 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:34,535 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 22:59:34,801 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:36,803 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:38,804 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:39,074 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:59:39,075 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:59:40,806 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:42,807 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:44,808 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:46,809 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:48,810 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:50,811 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:53,812 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:54,245 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 22:59:54,245 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 22:59:55,813 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:57,814 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 22:59:59,815 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:01,816 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:03,817 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:04,620 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:00:05,818 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:07,819 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:09,382 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:00:09,383 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:00:09,821 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:11,822 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:13,823 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:15,824 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:17,828 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:19,829 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:21,830 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:23,831 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:24,522 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:00:24,523 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:00:25,832 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:27,833 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:29,834 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:31,836 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:33,837 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:34,711 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:00:35,838 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:37,839 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:39,665 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:00:39,665 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:00:39,840 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:41,841 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:43,842 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:45,843 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:47,844 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:49,845 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:51,846 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:53,848 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:54,828 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:00:54,828 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:00:55,849 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:57,850 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:00:59,851 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:01,852 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:03,853 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:04,791 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:01:05,854 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:07,855 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:09,856 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:09,977 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:01:09,977 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:01:11,857 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:13,858 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:15,859 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:17,860 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:19,861 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:21,862 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:23,863 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:25,185 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:01:25,186 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:01:25,864 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:27,870 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:29,866 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:31,866 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:33,867 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:34,866 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:01:35,868 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:37,869 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:39,870 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:40,810 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:01:40,811 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:01:41,871 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:43,872 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:45,874 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:47,875 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:49,878 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:51,879 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:53,880 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:55,881 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:55,948 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:01:55,948 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:01:57,883 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:01:59,884 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:01,885 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:03,886 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:04,951 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:02:05,887 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:07,888 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:09,889 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:11,183 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:02:11,183 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:02:11,891 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:13,892 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:15,892 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:17,893 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:26,338 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:02:26,338 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:02:35,025 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:02:41,474 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:02:41,475 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:02:55,909 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:56,613 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:02:56,614 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:02:57,910 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:02:59,911 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:01,912 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:03,913 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:05,097 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:03:05,914 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:07,915 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:09,916 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:11,780 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:03:11,780 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:03:11,917 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:13,917 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:15,918 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:17,919 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:19,920 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:21,923 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:23,922 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:25,923 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:26,924 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:03:26,924 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:03:33,927 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:35,167 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:03:35,928 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:37,928 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:39,929 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:03:42,076 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:03:42,076 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:03:57,218 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:03:57,218 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:04:05,245 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:04:09,941 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:12,375 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:04:12,376 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:04:20,946 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:22,947 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:24,948 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:26,949 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:27,518 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:04:27,518 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:04:28,950 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:30,951 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:32,954 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:34,955 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:35,322 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:04:36,956 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:38,957 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:40,958 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:42,666 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:04:42,666 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:04:42,959 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:44,960 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:46,961 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:48,963 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:50,964 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:52,965 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:54,966 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:56,967 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:04:57,814 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:04:57,814 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:04:58,968 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:00,969 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:02,970 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:04,971 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:05,420 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:05:06,972 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:08,973 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:10,974 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:12,976 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:13,306 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:05:13,306 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:05:14,977 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:16,978 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:18,979 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:20,980 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:22,981 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:24,982 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:26,983 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:28,447 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:05:28,447 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:05:28,984 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:30,985 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:32,986 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:34,987 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:35,527 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:05:36,988 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:38,989 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:40,990 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:05:43,648 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:05:43,794 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:05:58,934 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:05:58,935 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:06:05,604 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:06:14,074 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:06:14,075 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:06:23,008 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:06:29,357 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:06:29,358 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:06:30,012 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:06:35,682 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:06:36,014 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:06:42,017 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:06:44,577 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:06:44,578 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:06:48,019 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:06:56,023 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:06:57,848 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:06:57,848 DEBUG SenderThread:3550240 [sender.py:send():234] send: telemetry +2022-07-29 23:06:57,848 DEBUG SenderThread:3550240 [sender.py:send():234] send: exit +2022-07-29 23:06:57,849 INFO SenderThread:3550240 [sender.py:send_exit():366] handling exit code: 1 +2022-07-29 23:06:57,850 INFO SenderThread:3550240 [sender.py:send_exit():368] handling runtime: 714 +2022-07-29 23:06:57,851 INFO SenderThread:3550240 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 23:06:57,851 INFO SenderThread:3550240 [sender.py:send_exit():374] send defer +2022-07-29 23:06:57,851 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:06:57,852 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:06:57,852 INFO HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-29 23:06:57,853 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: defer +2022-07-29 23:06:57,853 INFO SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-29 23:06:57,853 INFO SenderThread:3550240 [sender.py:transition_state():387] send defer: 1 +2022-07-29 23:06:57,853 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:06:57,853 INFO HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-29 23:06:57,937 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: defer +2022-07-29 23:06:57,937 INFO SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-29 23:06:57,937 INFO SenderThread:3550240 [sender.py:transition_state():387] send defer: 2 +2022-07-29 23:06:57,938 DEBUG SenderThread:3550240 [sender.py:send():234] send: stats +2022-07-29 23:06:57,938 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:06:57,938 INFO HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-29 23:06:57,938 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: defer +2022-07-29 23:06:57,939 INFO SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-29 23:06:57,939 INFO SenderThread:3550240 [sender.py:transition_state():387] send defer: 3 +2022-07-29 23:06:57,939 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:06:57,939 INFO HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-29 23:06:57,939 DEBUG SenderThread:3550240 [sender.py:send():234] send: summary +2022-07-29 23:06:57,939 INFO SenderThread:3550240 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 23:06:57,940 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: defer +2022-07-29 23:06:57,940 INFO SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-29 23:06:57,940 INFO SenderThread:3550240 [sender.py:transition_state():387] send defer: 4 +2022-07-29 23:06:57,940 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:06:57,940 INFO HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-29 23:06:57,940 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: defer +2022-07-29 23:06:57,940 INFO SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-29 23:06:57,954 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:06:58,024 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json +2022-07-29 23:06:58,024 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:06:58,119 INFO SenderThread:3550240 [sender.py:transition_state():387] send defer: 5 +2022-07-29 23:06:58,120 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:06:58,120 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:06:58,120 INFO HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-29 23:06:58,120 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: defer +2022-07-29 23:06:58,120 INFO SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-29 23:06:58,120 INFO SenderThread:3550240 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-29 23:06:58,221 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:06:59,024 INFO Thread-8 :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/config.yaml +2022-07-29 23:06:59,025 INFO SenderThread:3550240 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files +2022-07-29 23:06:59,025 INFO SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/config.yaml config.yaml +2022-07-29 23:06:59,026 INFO SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/requirements.txt requirements.txt +2022-07-29 23:06:59,026 INFO SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log output.log +2022-07-29 23:06:59,026 INFO SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json wandb-summary.json +2022-07-29 23:06:59,026 INFO SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-metadata.json wandb-metadata.json +2022-07-29 23:06:59,032 INFO SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-29 23:06:59,032 INFO SenderThread:3550240 [sender.py:transition_state():387] send defer: 6 +2022-07-29 23:06:59,032 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:06:59,033 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:06:59,033 INFO HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-29 23:06:59,034 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: defer +2022-07-29 23:06:59,034 INFO SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-29 23:06:59,034 INFO SenderThread:3550240 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 23:06:59,134 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:06:59,134 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:06:59,236 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:06:59,236 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:06:59,337 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:06:59,338 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:06:59,439 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:06:59,440 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:06:59,522 INFO Thread-13 :3550240 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/config.yaml +2022-07-29 23:06:59,541 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:06:59,541 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:06:59,582 INFO Thread-16 :3550240 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json +2022-07-29 23:06:59,603 INFO Thread-14 :3550240 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/requirements.txt +2022-07-29 23:06:59,643 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:06:59,643 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:06:59,733 INFO Thread-15 :3550240 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log +2022-07-29 23:06:59,744 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:06:59,745 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:06:59,846 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:06:59,846 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:06:59,934 INFO Thread-7 :3550240 [sender.py:transition_state():387] send defer: 7 +2022-07-29 23:06:59,934 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:06:59,934 INFO HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-29 23:06:59,935 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: defer +2022-07-29 23:06:59,935 INFO SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-29 23:06:59,948 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:07:00,405 INFO SenderThread:3550240 [sender.py:transition_state():387] send defer: 8 +2022-07-29 23:07:00,405 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:07:00,406 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:07:00,406 INFO HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-29 23:07:00,406 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: defer +2022-07-29 23:07:00,406 INFO SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-29 23:07:00,406 INFO SenderThread:3550240 [sender.py:transition_state():387] send defer: 9 +2022-07-29 23:07:00,407 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:07:00,407 INFO HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-29 23:07:00,407 DEBUG SenderThread:3550240 [sender.py:send():234] send: final +2022-07-29 23:07:00,407 DEBUG SenderThread:3550240 [sender.py:send():234] send: footer +2022-07-29 23:07:00,407 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: defer +2022-07-29 23:07:00,407 INFO SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-29 23:07:00,507 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:07:00,507 DEBUG SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:07:00,507 INFO SenderThread:3550240 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 23:07:00,771 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: get_summary +2022-07-29 23:07:00,772 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-29 23:07:00,772 DEBUG HandlerThread:3550240 [handler.py:handle_request():130] handle_request: shutdown +2022-07-29 23:07:00,772 INFO HandlerThread:3550240 [handler.py:finish():731] shutting down handler +2022-07-29 23:07:01,407 INFO WriterThread:3550240 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/run-398l7dkj.wandb +2022-07-29 23:07:01,770 INFO SenderThread:3550240 [sender.py:finish():1070] shutting down sender +2022-07-29 23:07:01,771 INFO SenderThread:3550240 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 23:07:01,771 INFO SenderThread:3550240 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 23:07:01,774 INFO MainThread:3550240 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220729_225502-398l7dkj/logs/debug.log b/wandb/run-20220729_225502-398l7dkj/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0cb91e910d9f15409f7f5790087dc5b25ac4ae5a --- /dev/null +++ b/wandb/run-20220729_225502-398l7dkj/logs/debug.log @@ -0,0 +1,148 @@ +2022-07-29 22:55:02,545 INFO MainThread:3548990 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-29 22:55:02,546 INFO MainThread:3548990 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-29 22:55:02,546 INFO MainThread:3548990 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/logs/debug.log +2022-07-29 22:55:02,546 INFO MainThread:3548990 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/logs/debug-internal.log +2022-07-29 22:55:02,546 INFO MainThread:3548990 [wandb_init.py:init():404] calling init triggers +2022-07-29 22:55:02,546 INFO MainThread:3548990 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-29 22:55:02,546 INFO MainThread:3548990 [wandb_init.py:init():460] starting backend +2022-07-29 22:55:02,546 INFO MainThread:3548990 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-29 22:55:02,591 INFO MainThread:3548990 [backend.py:ensure_launched():216] starting backend process... +2022-07-29 22:55:02,633 INFO MainThread:3548990 [backend.py:ensure_launched():221] started backend process with pid: 3550240 +2022-07-29 22:55:02,635 INFO MainThread:3548990 [wandb_init.py:init():469] backend started and connected +2022-07-29 22:55:02,648 INFO MainThread:3548990 [wandb_init.py:init():533] updated telemetry +2022-07-29 22:55:02,755 INFO MainThread:3548990 [wandb_init.py:init():563] communicating current version +2022-07-29 22:55:03,483 INFO MainThread:3548990 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-29 22:55:03,484 INFO MainThread:3548990 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-29 22:55:03,661 INFO MainThread:3548990 [wandb_init.py:init():606] starting run threads in backend +2022-07-29 22:55:05,995 INFO MainThread:3548990 [wandb_run.py:_console_start():1810] atexit reg +2022-07-29 22:55:05,996 INFO MainThread:3548990 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-29 22:55:05,996 INFO MainThread:3548990 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-29 22:55:05,998 INFO MainThread:3548990 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-29 22:55:05,998 INFO MainThread:3548990 [wandb_init.py:init():633] run started, returning control to user process +2022-07-29 23:06:55,326 INFO MainThread:3548990 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-29 23:06:55,330 INFO MainThread:3548990 [wandb_run.py:_restore():1752] restore +2022-07-29 23:06:57,852 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73918 + total_bytes: 73918 +} + +2022-07-29 23:06:58,120 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73918 + total_bytes: 73918 +} + +2022-07-29 23:06:59,033 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73918 + total_bytes: 332656 +} + +2022-07-29 23:06:59,135 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73918 + total_bytes: 332656 +} + +2022-07-29 23:06:59,237 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 332656 + total_bytes: 332656 +} + +2022-07-29 23:06:59,339 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 332656 + total_bytes: 332656 +} + +2022-07-29 23:06:59,440 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 332656 + total_bytes: 332656 +} + +2022-07-29 23:06:59,542 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 332656 + total_bytes: 332656 +} + +2022-07-29 23:06:59,644 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 332656 + total_bytes: 332656 +} + +2022-07-29 23:06:59,745 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 332656 + total_bytes: 332656 +} + +2022-07-29 23:06:59,847 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 332656 + total_bytes: 332656 +} + +2022-07-29 23:07:00,406 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 332656 + total_bytes: 332656 +} + +2022-07-29 23:07:00,771 INFO MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 332656 + total_bytes: 332656 +} +local_info { +} + +2022-07-29 23:07:02,394 INFO MainThread:3548990 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220729_225502-398l7dkj/run-398l7dkj.wandb b/wandb/run-20220729_225502-398l7dkj/run-398l7dkj.wandb new file mode 100644 index 0000000000000000000000000000000000000000..0cf6b676aea3057f2db029cb42f492cf13e4e958 --- /dev/null +++ b/wandb/run-20220729_225502-398l7dkj/run-398l7dkj.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dbcd1f0f1caf2601a6ec76b266f1e0370a12efed9f5a3044abe1af8f770a741 +size 379814 diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_231127-1dfdwyjl/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac --- /dev/null +++ b/wandb/run-20220729_231127-1dfdwyjl/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml b/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4efdc4427823db79e345df347c79507f79524662 --- /dev/null +++ b/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659136287 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/output.log b/wandb/run-20220729_231127-1dfdwyjl/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1893b32522e6791e9a9fab73f422ae04f0e5c40d --- /dev/null +++ b/wandb/run-20220729_231127-1dfdwyjl/files/output.log @@ -0,0 +1,1556 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul29_23-11-24_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=128, +per_device_train_batch_size=128, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.19it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 444.16it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'bias'), ('project_hid', 'kernel'), ('project_q', 'bias'), ('project_q', 'kernel'), ('quantizer', 'codevectors'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'weight_proj', 'kernel')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9193.91ex/s] +removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8958.21ex/s] +removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8807.85ex/s] +removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8784.75ex/s] +removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8832.04ex/s] +removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8739.93ex/s] +removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8775.53ex/s] +removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8387.11ex/s] +removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8735.11ex/s] +removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8755.82ex/s] +removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9024.17ex/s] +removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8645.13ex/s] +removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8553.59ex/s] +removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8613.35ex/s] +removing punctuation from train split #7: 93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8904/9523 [00:01<00:00, 7980.09ex/s] +removing punctuation from train split #8: 94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8972/9523 [00:01<00:00, 8456.61ex/s] +removing punctuation from train split #9: 93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8871/9523 [00:01<00:00, 8764.44ex/s] +removing punctuation from train split #10: 55%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 5243/9523 [00:00<00:00, 8817.16ex/s] +removing punctuation from train split #11: 55%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 5209/9523 [00:00<00:00, 8834.64ex/s] +removing punctuation from train split #10: 64%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 6125/9523 [00:00<00:00, 8529.96ex/s] +removing punctuation from train split #10: 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9081/9523 [00:01<00:00, 9422.28ex/s] +removing punctuation from train split #11: 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 7871/9523 [00:00<00:00, 8692.94ex/s] +removing punctuation from train split #11: 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8753/9523 [00:01<00:00, 8729.01ex/s] +removing punctuation from train split #12: 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8650/9522 [00:01<00:00, 8652.34ex/s] +removing punctuation from train split #12: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9520/9522 [00:01<00:00, 8664.29ex/s] +removing punctuation from train split #13: 91%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8703/9522 [00:01<00:00, 8638.43ex/s] +removing punctuation from train split #15: 72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 6869/9522 [00:00<00:00, 8755.53ex/s] +removing punctuation from train split #14: 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 8760/9522 [00:01<00:00, 8884.44ex/s] +removing punctuation from train split #16: 73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 6936/9522 [00:00<00:00, 8748.75ex/s] +removing punctuation from train split #16: 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 7829/9522 [00:00<00:00, 8803.55ex/s] +removing punctuation from train split #18: 62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 5905/9522 [00:00<00:00, 8674.73ex/s] +removing punctuation from train split #17: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 7699/9522 [00:00<00:00, 8739.44ex/s] +removing punctuation from train split #19: 62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 5908/9522 [00:00<00:00, 8170.09ex/s] +removing punctuation from train split #20: 63%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 6038/9522 [00:00<00:00, 8747.93ex/s] +removing punctuation from train split #21: 53%|███████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 5019/9522 [00:00<00:00, 8166.74ex/s] +removing punctuation from train split #24: 26%|███████████████████████████████████████████████████▋ | 2512/9522 [00:00<00:00, 8445.21ex/s] +removing punctuation from train split #22: 54%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 5188/9522 [00:00<00:00, 8759.94ex/s] +removing punctuation from train split #24: 36%|█████████████████████████████████████████████████████████████████████▉ | 3396/9522 [00:00<00:00, 8597.94ex/s] +removing punctuation from train split #26: 17%|█████████████████████████████████▊ | 1645/9522 [00:00<00:00, 8275.63ex/s] +removing punctuation from train split #27: 7%|█████████████▉ | 676/9522 [00:00<00:01, 6755.98ex/s] +removing punctuation from train split #26: 26%|███████████████████████████████████████████████████▊ | 2520/9522 [00:00<00:00, 8489.28ex/s] +removing punctuation from train split #28: 8%|████████████████ | 776/9522 [00:00<00:01, 7757.84ex/s] +removing punctuation from train split #29: 8%|████████████████▍ | 794/9522 [00:00<00:01, 7931.14ex/s] +removing punctuation from train split #23: 93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8848/9522 [00:01<00:00, 9067.40ex/s] +removing punctuation from train split #24: 84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8010/9522 [00:00<00:00, 9242.91ex/s] +removing punctuation from train split #25: 74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 7044/9522 [00:00<00:00, 9045.07ex/s] +removing punctuation from train split #26: 64%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 6123/9522 [00:00<00:00, 8965.65ex/s] +removing punctuation from train split #24: 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8935/9522 [00:01<00:00, 8746.49ex/s] +removing punctuation from train split #25: 93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8900/9522 [00:01<00:00, 9158.21ex/s] +removing punctuation from train split #26: 84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7993/9522 [00:00<00:00, 8934.43ex/s] +removing punctuation from train split #26: 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8967/9522 [00:01<00:00, 9175.35ex/s] +removing punctuation from train split #28: 65%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 6182/9522 [00:00<00:00, 9114.81ex/s] +removing punctuation from train split #27: 83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7897/9522 [00:00<00:00, 8462.44ex/s] +removing punctuation from train split #27: 93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8849/9522 [00:01<00:00, 8770.22ex/s] +removing punctuation from train split #28: 94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8988/9522 [00:01<00:00, 8946.88ex/s] +removing punctuation from train split #29: 85%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8068/9522 [00:00<00:00, 8464.40ex/s] +removing punctuation from train split #29: 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9032/9522 [00:01<00:00, 8804.29ex/s] +removing punctuation from train split #31: 87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8242/9522 [00:00<00:00, 8701.18ex/s] +removing punctuation from train split #30: 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9036/9522 [00:01<00:00, 8927.84ex/s] +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow9124/9522 [00:01<00:00, 8249.38ex/s] +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow +preprocess dataset #0: 0%|█ | 47/9497 [00:01<04:09, 37.86ex/s] +preprocess dataset #1: 0%|▌ | 26/9497 [00:01<04:53, 32.29ex/s] +preprocess dataset #2: 1%|█▋ | 74/9497 [00:02<03:01, 51.86ex/s] +preprocess dataset #3: 1%|█▏ | 50/9497 [00:01<03:12, 49.12ex/s] +preprocess dataset #4: 0%|▋ | 30/9497 [00:01<03:44, 42.23ex/s] +preprocess dataset #5: 1%|██▎ | 102/9497 [00:02<02:59, 52.44ex/s] +preprocess dataset #6: 1%|█▏ | 49/9497 [00:01<03:16, 48.06ex/s] +preprocess dataset #7: 0%|▎ | 12/9497 [00:00<08:35, 18.40ex/s] +preprocess dataset #8: 1%|█▌ | 65/9497 [00:01<03:11, 49.33ex/s] +preprocess dataset #9: 0%|▏ | 6/9497 [00:00<17:10, 9.21ex/s] +preprocess dataset #10: 1%|█▉ | 85/9497 [00:02<03:30, 44.78ex/s] +preprocess dataset #11: 0%|▉ | 38/9496 [00:01<03:52, 40.64ex/s] +preprocess dataset #12: 1%|█▋ | 73/9496 [00:02<03:25, 45.85ex/s] +preprocess dataset #13: 0%|▉ | 39/9496 [00:01<03:54, 40.29ex/s] +preprocess dataset #14: 1%|█▊ | 80/9496 [00:02<04:16, 36.75ex/s] +preprocess dataset #15: 0%|▉ | 39/9496 [00:01<04:15, 36.95ex/s] +preprocess dataset #16: 1%|██ | 88/9496 [00:02<03:49, 41.04ex/s] +preprocess dataset #17: 1%|█▏ | 54/9496 [00:02<03:46, 41.74ex/s] +preprocess dataset #18: 1%|█▍ | 63/9496 [00:02<03:46, 41.61ex/s] +preprocess dataset #19: 0%|▌ | 27/9496 [00:01<04:45, 33.17ex/s] +preprocess dataset #20: 1%|██▏ | 96/9496 [00:03<03:59, 39.22ex/s] +preprocess dataset #21: 0%|█ | 44/9496 [00:01<04:26, 35.43ex/s] +preprocess dataset #22: 1%|█▊ | 76/9496 [00:03<04:27, 35.16ex/s] +preprocess dataset #23: 0%|▍ | 20/9496 [00:01<06:38, 23.79ex/s] +preprocess dataset #24: 1%|█▏ | 49/9496 [00:02<05:52, 26.77ex/s] +preprocess dataset #25: 0%|▍ | 17/9496 [00:01<07:34, 20.86ex/s] +preprocess dataset #26: 0%|▋ | 32/9496 [00:01<05:28, 28.82ex/s] +preprocess dataset #27: 1%|█▍ | 64/9496 [00:03<05:27, 28.79ex/s] +preprocess dataset #28: 0%|▊ | 33/9496 [00:01<06:14, 25.27ex/s] +preprocess dataset #29: 0%|█ | 47/9496 [00:02<05:43, 27.48ex/s] +preprocess dataset #30: 0%|▏ | 8/9496 [00:01<14:28, 10.92ex/s] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +preprocess dataset #30: 44%|███████████████████████████████████████████████████████████████████████████████████████████████▎ | 4172/9496 [02:25<02:41, 32.92ex/s] + + + + +preprocess dataset #30: 47%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 4496/9496 [02:35<02:21, 35.37ex/s] + + + + + + + + + + + + + + + + +preprocess dataset #30: 58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 5511/9496 [03:10<02:19, 28.60ex/s] + + + + + + + + + + + + + + + + + + + + +preprocess dataset #30: 71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 6764/9496 [03:52<01:32, 29.51ex/s] + + + + + + + + + + + + +preprocess dataset #30: 79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7549/9496 [04:18<01:02, 31.36ex/s] + + +preprocess dataset #30: 81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7731/9496 [04:24<00:57, 30.66ex/s] + + + + + + +preprocess dataset #30: 86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8164/9496 [04:38<00:37, 35.53ex/s] +preprocess dataset #28: 89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8419/9496 [04:42<00:33, 31.86ex/s] +preprocess dataset #29: 87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8294/9496 [04:41<00:29, 40.13ex/s] +preprocess dataset #30: 87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8230/9496 [04:40<00:35, 35.23ex/s] +preprocess dataset #30: 88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8321/9496 [04:42<00:31, 37.50ex/s] +preprocess dataset #30: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8400/9496 [04:44<00:25, 42.61ex/s] +preprocess dataset #29: 90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8520/9496 [04:47<00:27, 35.34ex/s] +preprocess dataset #30: 89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8479/9496 [04:46<00:24, 41.29ex/s] +preprocess dataset #30: 90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8524/9496 [04:47<00:24, 39.90ex/s] +preprocess dataset #29: 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 8677/9496 [04:52<00:22, 37.10ex/s] +preprocess dataset #30: 91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8649/9496 [04:50<00:17, 47.74ex/s] + +preprocess dataset #29: 93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8846/9496 [04:56<00:13, 49.05ex/s] +preprocess dataset #30: 93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8823/9496 [04:54<00:14, 45.30ex/s] +preprocess dataset #29: 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8924/9496 [04:57<00:12, 46.54ex/s] +preprocess dataset #30: 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8900/9496 [04:56<00:12, 46.90ex/s] +preprocess dataset #30: 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8990/9496 [04:58<00:09, 51.57ex/s] +preprocess dataset #30: 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8996/9496 [04:58<00:10, 48.61ex/s] +preprocess dataset #30: 95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9061/9496 [05:00<00:08, 52.89ex/s] +preprocess dataset #27: 99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9409/9496 [05:06<00:01, 61.40ex/s] +preprocess dataset #28: 98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9299/9496 [05:05<00:04, 41.62ex/s] +preprocess dataset #29: 97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9201/9496 [05:04<00:06, 43.92ex/s] +preprocess dataset #30: 97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9195/9496 [05:02<00:04, 61.11ex/s] +preprocess dataset #28: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9403/9496 [05:07<00:01, 73.94ex/s] +preprocess dataset #28: 99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9418/9496 [05:07<00:01, 61.93ex/s] +preprocess dataset #29: 98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9309/9496 [05:06<00:02, 65.64ex/s] +preprocess dataset #30: 98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9320/9496 [05:04<00:02, 65.86ex/s] +preprocess dataset #30: 99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9416/9496 [05:06<00:01, 56.68ex/s] +preprocess dataset #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 9459/9496 [05:06<00:00, 75.92ex/s] +preprocess dataset #4: 0%| | 0/1267 [00:00 to the vocabulary +Adding to the vocabulary +2022-07-29 23:23:21.061068: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2022-07-29 23:23:21.061132: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) +/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) +INFO:__main__:***** Running training ***** +INFO:__main__: Num examples = 302693 +INFO:__main__: Num Epochs = 40 +INFO:__main__: Instantaneous batch size per device = 128 +INFO:__main__: Num gradient accumulation steps = 1 +INFO:__main__: Total train batch size (w. parallel & distributed) = 1024 +INFO:__main__: Total optimization steps = 11800 +INFO:__main__: Gradient checkpointing: True +INFO:__main__: Use scan: False +INFO:__main__: Fuse matmuls: False +Epoch ... (1/40): 0%| | 0/40 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) + File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss + out_tree, out_flat = f_pmapped_(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f + out = pxla.xla_pmap( + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind + return map_bind(self, fun, *args, **params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process + return trace.process_map(self, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call + return primitive.impl(f, *tracers, **params) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl + compiled_fun, fingerprint = parallel_callable( + File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun + ans = call(fun, *args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable + pmap_executable = pmap_computation.compile() + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile + self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo + compiled = dispatch.compile_or_get_cached( + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached + return backend_compile(backend, computation, compile_options, host_callbacks) + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile + return backend.compile(built_c, compile_options=options) +jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 51.78G of 15.48G hbm. Exceeded hbm capacity by 36.29G. +Total hbm usage >= 52.29G: + reserved 530.00M + program 51.78G + arguments 0B +Output size 0B; shares 0B with arguments. +Program hbm requirement 51.78G: + global 132.0K + scoped 72.08M + HLO temp 51.71G (99.1% utilization: Unpadded (49.63G) Padded (50.08G), 3.1% fragmentation (1.63G)) + Largest program allocations in hbm: + 1. Size: 7.81G + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/47/remat(core_fn)/47/attention/jit(_einsum)/dot_general[dimension_numbers=(((2,), (3,)), ((0, 1), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387 + Shape: f32[128,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 7.61G + Extra memory due to padding: 203.12M (1.0x expansion) + XLA label: fusion.29627 = fusion(copy.5391.remat2, bitcast.10361), kind=kOutput, calls=fused_computation.22171 + Allocation type: HLO temp + ========================== + 2. Size: 7.81G + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/47/remat(core_fn)/47/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[128,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 7.61G + Extra memory due to padding: 203.12M (1.0x expansion) + XLA label: fusion.190.remat7 = fusion(bitcast.10358, bitcast.10356, copy.5386), kind=kOutput, calls=fused_computation.186.clone.clone.clone.clone.clone.clone.clone + Allocation type: HLO temp + ========================== + 3. Size: 624.38M + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.13721.remat_compressed = copy(fusion.27401) + Allocation type: HLO temp + ========================== + 4. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/22/22/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22380 = fusion(fusion.7480.remat, get-tuple-element.13471, get-tuple-element.13469, get-tuple-element.13473, ...(+5)), kind=kOutput, calls=fused_computation.20608 + Allocation type: HLO temp + ========================== + 5. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/44/44/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22336 = fusion(fusion.7414.remat, get-tuple-element.13855, get-tuple-element.13853, get-tuple-element.13857, ...(+5)), kind=kOutput, calls=fused_computation.20564 + Allocation type: HLO temp + ========================== + 6. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/39/39/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22346 = fusion(fusion.7429.remat, get-tuple-element.13759, get-tuple-element.13757, get-tuple-element.13761, ...(+5)), kind=kOutput, calls=fused_computation.20574 + Allocation type: HLO temp + ========================== + 7. Size: 624.38M + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: copy.8318 = copy(slice.408) + Allocation type: HLO temp + ========================== + 8. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/feature_projection/projection/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22426 = fusion(copy.8318, get-tuple-element.13989, fusion.10472, get-tuple-element.10358, ...(+9)), kind=kOutput, calls=fused_computation.20654 + Allocation type: HLO temp + ========================== + 9. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/46/46/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22332 = fusion(fusion.7408.remat, get-tuple-element.13887, get-tuple-element.13885, get-tuple-element.13889, ...(+5)), kind=kOutput, calls=fused_computation.20560 + Allocation type: HLO temp + ========================== + 10. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22424 = fusion(get-tuple-element.11275, get-tuple-element.13231, get-tuple-element.13229, get-tuple-element.13233, ...(+5)), kind=kOutput, calls=fused_computation.20652 + Allocation type: HLO temp + ========================== + 11. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22422 = fusion(fusion.7543.remat, get-tuple-element.13247, get-tuple-element.13245, get-tuple-element.13249, ...(+5)), kind=kOutput, calls=fused_computation.20650 + Allocation type: HLO temp + ========================== + 12. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22420 = fusion(fusion.7540.remat, get-tuple-element.13423, get-tuple-element.13421, get-tuple-element.13425, ...(+5)), kind=kOutput, calls=fused_computation.20648 + Allocation type: HLO temp + ========================== + 13. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22418 = fusion(fusion.7537.remat, get-tuple-element.13599, get-tuple-element.13597, get-tuple-element.13601, ...(+5)), kind=kOutput, calls=fused_computation.20646 + Allocation type: HLO temp + ========================== + 14. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22416 = fusion(fusion.7534.remat, get-tuple-element.13775, get-tuple-element.13773, get-tuple-element.13777, ...(+5)), kind=kOutput, calls=fused_computation.20644 + Allocation type: HLO temp + ========================== + 15. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22414 = fusion(fusion.7531.remat, get-tuple-element.13919, get-tuple-element.13917, get-tuple-element.13921, ...(+5)), kind=kOutput, calls=fused_computation.20642 + Allocation type: HLO temp + ========================== + 16. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22412 = fusion(fusion.7528.remat, get-tuple-element.13935, get-tuple-element.13933, get-tuple-element.13937, ...(+5)), kind=kOutput, calls=fused_computation.20640 + Allocation type: HLO temp + ========================== + 17. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22410 = fusion(fusion.7525.remat, get-tuple-element.13951, get-tuple-element.13949, get-tuple-element.13953, ...(+5)), kind=kOutput, calls=fused_computation.20638 + Allocation type: HLO temp + ========================== + 18. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22408 = fusion(fusion.7522.remat, get-tuple-element.13967, get-tuple-element.13965, get-tuple-element.13969, ...(+5)), kind=kOutput, calls=fused_computation.20636 + Allocation type: HLO temp + ========================== + 19. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22406 = fusion(fusion.7519.remat, get-tuple-element.13983, get-tuple-element.13981, get-tuple-element.13985, ...(+5)), kind=kOutput, calls=fused_computation.20634 + Allocation type: HLO temp + ========================== + 20. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22404 = fusion(fusion.7516.remat, get-tuple-element.13263, get-tuple-element.13261, get-tuple-element.13265, ...(+5)), kind=kOutput, calls=fused_computation.20632 + Allocation type: HLO temp + ========================== +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1605, in + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) +jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 51.78G of 15.48G hbm. Exceeded hbm capacity by 36.29G. +Total hbm usage >= 52.29G: + reserved 530.00M + program 51.78G + arguments 0B +Output size 0B; shares 0B with arguments. +Program hbm requirement 51.78G: + global 132.0K + scoped 72.08M + HLO temp 51.71G (99.1% utilization: Unpadded (49.63G) Padded (50.08G), 3.1% fragmentation (1.63G)) + Largest program allocations in hbm: + 1. Size: 7.81G + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/47/remat(core_fn)/47/attention/jit(_einsum)/dot_general[dimension_numbers=(((2,), (3,)), ((0, 1), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387 + Shape: f32[128,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 7.61G + Extra memory due to padding: 203.12M (1.0x expansion) + XLA label: fusion.29627 = fusion(copy.5391.remat2, bitcast.10361), kind=kOutput, calls=fused_computation.22171 + Allocation type: HLO temp + ========================== + 2. Size: 7.81G + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/47/remat(core_fn)/47/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[128,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 7.61G + Extra memory due to padding: 203.12M (1.0x expansion) + XLA label: fusion.190.remat7 = fusion(bitcast.10358, bitcast.10356, copy.5386), kind=kOutput, calls=fused_computation.186.clone.clone.clone.clone.clone.clone.clone + Allocation type: HLO temp + ========================== + 3. Size: 624.38M + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.13721.remat_compressed = copy(fusion.27401) + Allocation type: HLO temp + ========================== + 4. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/22/22/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22380 = fusion(fusion.7480.remat, get-tuple-element.13471, get-tuple-element.13469, get-tuple-element.13473, ...(+5)), kind=kOutput, calls=fused_computation.20608 + Allocation type: HLO temp + ========================== + 5. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/44/44/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22336 = fusion(fusion.7414.remat, get-tuple-element.13855, get-tuple-element.13853, get-tuple-element.13857, ...(+5)), kind=kOutput, calls=fused_computation.20564 + Allocation type: HLO temp + ========================== + 6. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/39/39/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22346 = fusion(fusion.7429.remat, get-tuple-element.13759, get-tuple-element.13757, get-tuple-element.13761, ...(+5)), kind=kOutput, calls=fused_computation.20574 + Allocation type: HLO temp + ========================== + 7. Size: 624.38M + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: copy.8318 = copy(slice.408) + Allocation type: HLO temp + ========================== + 8. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/feature_projection/projection/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22426 = fusion(copy.8318, get-tuple-element.13989, fusion.10472, get-tuple-element.10358, ...(+9)), kind=kOutput, calls=fused_computation.20654 + Allocation type: HLO temp + ========================== + 9. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/46/46/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22332 = fusion(fusion.7408.remat, get-tuple-element.13887, get-tuple-element.13885, get-tuple-element.13889, ...(+5)), kind=kOutput, calls=fused_computation.20560 + Allocation type: HLO temp + ========================== + 10. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22424 = fusion(get-tuple-element.11275, get-tuple-element.13231, get-tuple-element.13229, get-tuple-element.13233, ...(+5)), kind=kOutput, calls=fused_computation.20652 + Allocation type: HLO temp + ========================== + 11. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22422 = fusion(fusion.7543.remat, get-tuple-element.13247, get-tuple-element.13245, get-tuple-element.13249, ...(+5)), kind=kOutput, calls=fused_computation.20650 + Allocation type: HLO temp + ========================== + 12. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22420 = fusion(fusion.7540.remat, get-tuple-element.13423, get-tuple-element.13421, get-tuple-element.13425, ...(+5)), kind=kOutput, calls=fused_computation.20648 + Allocation type: HLO temp + ========================== + 13. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22418 = fusion(fusion.7537.remat, get-tuple-element.13599, get-tuple-element.13597, get-tuple-element.13601, ...(+5)), kind=kOutput, calls=fused_computation.20646 + Allocation type: HLO temp + ========================== + 14. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22416 = fusion(fusion.7534.remat, get-tuple-element.13775, get-tuple-element.13773, get-tuple-element.13777, ...(+5)), kind=kOutput, calls=fused_computation.20644 + Allocation type: HLO temp + ========================== + 15. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22414 = fusion(fusion.7531.remat, get-tuple-element.13919, get-tuple-element.13917, get-tuple-element.13921, ...(+5)), kind=kOutput, calls=fused_computation.20642 + Allocation type: HLO temp + ========================== + 16. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22412 = fusion(fusion.7528.remat, get-tuple-element.13935, get-tuple-element.13933, get-tuple-element.13937, ...(+5)), kind=kOutput, calls=fused_computation.20640 + Allocation type: HLO temp + ========================== + 17. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22410 = fusion(fusion.7525.remat, get-tuple-element.13951, get-tuple-element.13949, get-tuple-element.13953, ...(+5)), kind=kOutput, calls=fused_computation.20638 + Allocation type: HLO temp + ========================== + 18. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22408 = fusion(fusion.7522.remat, get-tuple-element.13967, get-tuple-element.13965, get-tuple-element.13969, ...(+5)), kind=kOutput, calls=fused_computation.20636 + Allocation type: HLO temp + ========================== + 19. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22406 = fusion(fusion.7519.remat, get-tuple-element.13983, get-tuple-element.13981, get-tuple-element.13985, ...(+5)), kind=kOutput, calls=fused_computation.20634 + Allocation type: HLO temp + ========================== + 20. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[128,999,1280]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.22404 = fusion(fusion.7516.remat, get-tuple-element.13263, get-tuple-element.13261, get-tuple-element.13265, ...(+5)), kind=kOutput, calls=fused_computation.20632 + Allocation type: HLO temp + ========================== \ No newline at end of file diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt b/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/wandb-metadata.json b/wandb/run-20220729_231127-1dfdwyjl/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..49ba9fbd8b3a20b020fba53a66523f5898978f6a --- /dev/null +++ b/wandb/run-20220729_231127-1dfdwyjl/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-29T23:11:31.214337", + "startedAt": "2022-07-29T23:11:27.767847", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=./", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=128", + "--per_device_eval_batch_size=128", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json b/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..0e9bee84d7792f9ed29dd896dbf35c24f9be20ae --- /dev/null +++ b/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 1042}} \ No newline at end of file diff --git a/wandb/run-20220729_231127-1dfdwyjl/logs/debug-internal.log b/wandb/run-20220729_231127-1dfdwyjl/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..00862f1e1cbace2cb76a5e62ee06c815e759083b --- /dev/null +++ b/wandb/run-20220729_231127-1dfdwyjl/logs/debug-internal.log @@ -0,0 +1,579 @@ +2022-07-29 23:11:28,666 INFO MainThread:2561171 [internal.py:wandb_internal():87] W&B internal server running at pid: 2561171, started at: 2022-07-29 23:11:28.666358 +2022-07-29 23:11:28,668 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: check_version +2022-07-29 23:11:28,669 INFO WriterThread:2561171 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/run-1dfdwyjl.wandb +2022-07-29 23:11:28,669 DEBUG SenderThread:2561171 [sender.py:send():234] send: header +2022-07-29 23:11:28,670 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: check_version +2022-07-29 23:11:28,721 DEBUG SenderThread:2561171 [sender.py:send():234] send: run +2022-07-29 23:11:28,892 INFO SenderThread:2561171 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files +2022-07-29 23:11:28,892 INFO SenderThread:2561171 [sender.py:_start_run_threads():804] run started: 1dfdwyjl with start time 1659136287 +2022-07-29 23:11:28,893 DEBUG SenderThread:2561171 [sender.py:send():234] send: summary +2022-07-29 23:11:28,893 INFO SenderThread:2561171 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 23:11:28,895 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: run_start +2022-07-29 23:11:29,894 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json +2022-07-29 23:11:31,214 DEBUG HandlerThread:2561171 [meta.py:__init__():40] meta init +2022-07-29 23:11:31,214 DEBUG HandlerThread:2561171 [meta.py:__init__():54] meta init done +2022-07-29 23:11:31,214 DEBUG HandlerThread:2561171 [meta.py:probe():214] probe +2022-07-29 23:11:31,215 DEBUG HandlerThread:2561171 [meta.py:_setup_git():204] setup git +2022-07-29 23:11:31,252 DEBUG HandlerThread:2561171 [meta.py:_setup_git():211] setup git done +2022-07-29 23:11:31,253 DEBUG HandlerThread:2561171 [meta.py:_save_code():92] save code +2022-07-29 23:11:31,266 DEBUG HandlerThread:2561171 [meta.py:_save_code():113] save code done +2022-07-29 23:11:31,266 DEBUG HandlerThread:2561171 [meta.py:_save_patches():130] save patches +2022-07-29 23:11:31,326 DEBUG HandlerThread:2561171 [meta.py:_save_patches():172] save patches done +2022-07-29 23:11:31,326 DEBUG HandlerThread:2561171 [meta.py:_save_pip():58] save pip +2022-07-29 23:11:31,327 DEBUG HandlerThread:2561171 [meta.py:_save_pip():72] save pip done +2022-07-29 23:11:31,327 DEBUG HandlerThread:2561171 [meta.py:probe():252] probe done +2022-07-29 23:11:31,330 DEBUG SenderThread:2561171 [sender.py:send():234] send: files +2022-07-29 23:11:31,330 INFO SenderThread:2561171 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-29 23:11:31,331 INFO SenderThread:2561171 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-29 23:11:31,338 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:11:31,338 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:11:31,837 INFO Thread-11 :2561171 [upload_job.py:push():137] Uploaded file /tmp/tmprdbwdzkmwandb/1i44h917-wandb-metadata.json +2022-07-29 23:11:31,895 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:11:31,895 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt +2022-07-29 23:11:31,895 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-metadata.json +2022-07-29 23:11:31,895 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/code/run_flax_speech_recognition_ctc.py +2022-07-29 23:11:31,895 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/code +2022-07-29 23:11:32,052 INFO Thread-12 :2561171 [upload_job.py:push():137] Uploaded file /tmp/tmprdbwdzkmwandb/2vstmlwc-code/run_flax_speech_recognition_ctc.py +2022-07-29 23:11:33,896 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:11:35,897 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:11:37,897 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:11:39,898 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:11:45,901 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:11:46,519 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:11:46,519 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:11:47,902 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:11:59,299 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:11:59,907 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:12:01,743 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:12:01,743 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:12:01,908 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:12:12,914 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:12:14,915 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:12:16,897 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:12:16,897 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:12:28,922 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:12:29,375 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:12:30,923 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:12:32,034 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:12:32,034 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:12:47,188 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:12:47,189 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:12:59,451 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:13:02,378 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:13:02,378 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:13:13,945 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:15,946 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:17,785 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:13:17,785 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:13:17,947 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:19,948 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:21,949 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:23,950 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:25,951 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:27,952 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:29,520 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:13:29,953 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:31,954 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:32,981 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:13:32,981 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:13:33,955 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:35,956 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:37,957 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:39,958 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:41,959 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:43,960 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:45,961 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:47,962 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:48,142 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:13:48,142 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:13:49,963 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:51,964 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:53,965 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:55,966 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:57,967 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:13:59,601 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:13:59,968 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:01,969 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:03,286 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:14:03,287 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:14:03,970 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:05,971 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:07,972 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:09,973 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:11,974 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:14,056 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:16,057 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:18,134 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:18,441 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:14:18,441 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:14:20,135 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:22,136 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:24,137 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:26,138 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:28,139 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:29,967 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:14:30,140 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:32,141 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:33,728 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:14:33,728 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:14:34,141 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:36,142 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:38,144 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:40,145 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:42,145 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:44,146 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:46,147 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:48,148 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:48,887 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:14:48,888 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:14:50,149 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:52,150 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:54,151 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:56,152 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:14:58,153 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:00,038 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:15:00,154 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:02,195 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:04,029 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:15:04,030 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:15:04,158 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:06,159 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:08,161 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:10,161 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:12,163 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:14,164 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:16,165 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:18,166 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:19,192 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:15:19,192 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:15:20,167 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:22,169 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:24,170 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:26,171 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:28,172 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:30,122 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:15:30,173 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:32,174 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:34,176 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:34,330 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:15:34,330 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:15:36,177 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:38,178 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:40,179 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:42,180 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:44,184 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:47,185 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:49,186 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:49,502 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:15:49,502 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:15:51,188 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:53,190 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:55,192 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:57,192 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:15:59,194 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:00,208 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:16:01,196 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:03,197 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:04,655 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:16:04,655 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:16:05,198 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:07,199 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:09,200 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:11,201 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:13,202 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:15,203 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:17,204 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:19,205 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:19,858 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:16:19,859 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:16:21,206 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:23,207 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:25,207 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:27,208 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:29,210 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:30,303 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:16:31,211 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:33,212 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:35,054 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:16:35,054 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:16:35,213 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:37,214 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:39,215 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:41,216 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:43,217 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:45,218 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:47,219 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:49,220 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:50,201 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:16:50,201 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:16:51,221 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:53,222 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:55,223 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:57,224 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:16:59,225 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:00,386 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:17:01,226 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:03,227 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:05,228 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:05,345 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:17:05,345 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:17:07,229 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:09,231 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:11,232 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:13,233 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:15,234 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:17,235 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:19,236 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:20,489 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:17:20,490 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:17:21,237 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:23,238 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:25,239 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:27,240 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:29,241 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:30,478 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:17:31,242 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:33,243 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:35,245 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:35,630 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:17:35,631 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:17:37,246 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:39,247 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:41,248 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:43,249 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:45,250 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:47,251 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:49,254 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:50,775 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:17:50,776 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:17:51,255 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:53,256 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:55,257 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:57,258 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:17:59,259 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:00,571 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:18:01,260 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:03,261 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:05,261 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:05,946 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:18:05,946 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:18:07,263 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:09,264 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:11,265 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:13,266 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:15,267 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:17,268 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:19,269 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:21,108 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:18:21,108 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:18:21,272 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:23,273 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:25,274 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:27,275 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:29,275 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:30,646 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:18:31,276 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:33,278 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:35,279 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:36,273 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:18:36,273 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:18:37,281 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:39,282 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:41,283 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:43,284 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:45,285 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:47,285 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:49,286 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:18:51,439 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:18:51,440 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:19:00,714 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:19:06,576 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:19:06,576 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:19:20,300 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:21,794 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:19:21,795 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:19:22,300 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:24,301 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:26,302 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:28,303 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:30,304 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:30,785 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:19:32,305 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:34,306 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:36,307 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:36,936 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:19:36,937 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:19:38,308 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:40,309 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:42,310 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:44,311 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:46,312 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:48,313 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:50,314 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:52,084 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:19:52,084 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:19:54,370 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:56,370 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:19:58,371 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:00,372 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:00,889 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:20:02,373 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:07,220 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:20:07,221 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:20:22,356 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:20:22,356 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:20:30,956 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:20:37,669 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:20:37,670 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:20:40,389 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:42,390 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:44,391 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:46,392 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:48,392 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:50,393 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:52,394 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:52,864 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:20:52,865 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:20:54,395 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:56,396 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:20:58,397 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:00,398 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:01,034 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:21:02,399 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:04,399 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:06,400 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:08,007 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:21:08,007 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:21:08,402 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:10,404 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:12,405 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:14,406 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:16,407 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:18,408 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:20,409 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:22,410 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:23,145 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:21:23,146 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:21:24,411 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:26,412 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:28,413 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:30,414 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:31,116 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:21:32,414 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:34,415 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:36,417 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:38,285 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:21:38,286 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:21:38,418 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:40,419 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:42,420 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:44,421 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:46,422 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:48,423 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:50,424 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:52,425 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:53,463 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:21:53,464 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:21:54,426 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:56,427 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:21:58,430 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:22:00,431 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:22:01,191 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:22:08,606 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:22:08,606 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:22:23,745 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:22:23,745 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:22:31,267 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:22:38,973 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:22:38,974 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:22:43,448 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:22:45,449 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:22:51,452 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:22:54,350 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:22:54,350 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:22:59,455 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:23:01,342 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:23:06,458 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:23:09,699 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:23:09,700 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:23:12,461 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:23:20,465 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:23:22,466 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:23:24,891 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:23:24,891 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:23:26,467 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:23:30,469 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:23:31,417 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:23:33,470 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:23:40,452 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:23:40,453 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:23:41,474 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:23:56,024 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:23:56,025 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:24:01,498 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:24:11,418 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:24:11,419 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:24:26,889 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:24:26,890 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:24:31,572 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:24:42,392 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:24:42,392 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:24:58,028 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:24:58,028 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:25:01,645 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:25:13,350 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:25:13,350 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:25:20,516 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:25:28,503 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:25:28,504 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:25:31,721 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:25:43,990 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:25:43,990 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:25:53,530 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:25:59,162 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:25:59,162 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:26:01,533 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:26:01,794 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:26:03,534 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:26:14,326 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:26:14,326 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:26:29,473 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:26:29,473 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:26:31,872 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:26:44,613 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:26:44,613 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:26:59,751 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:26:59,751 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:27:01,950 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:27:14,936 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:27:14,937 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:27:30,071 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:27:30,071 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:27:32,027 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:27:45,210 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:27:45,210 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:28:00,347 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:28:00,347 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:28:02,106 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:28:15,480 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:28:15,480 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:28:30,782 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:28:30,782 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:28:32,182 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:28:45,925 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status +2022-07-29 23:28:45,925 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status +2022-07-29 23:28:50,603 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:28:51,678 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:51,679 DEBUG SenderThread:2561171 [sender.py:send():234] send: telemetry +2022-07-29 23:28:51,679 DEBUG SenderThread:2561171 [sender.py:send():234] send: exit +2022-07-29 23:28:51,679 INFO SenderThread:2561171 [sender.py:send_exit():366] handling exit code: 1 +2022-07-29 23:28:51,679 INFO SenderThread:2561171 [sender.py:send_exit():368] handling runtime: 1042 +2022-07-29 23:28:51,680 INFO SenderThread:2561171 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 23:28:51,680 INFO SenderThread:2561171 [sender.py:send_exit():374] send defer +2022-07-29 23:28:51,680 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:51,681 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:28:51,681 INFO HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-29 23:28:51,681 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: defer +2022-07-29 23:28:51,681 INFO SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-29 23:28:51,681 INFO SenderThread:2561171 [sender.py:transition_state():387] send defer: 1 +2022-07-29 23:28:51,682 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:28:51,682 INFO HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-29 23:28:51,730 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: defer +2022-07-29 23:28:51,730 INFO SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-29 23:28:51,730 INFO SenderThread:2561171 [sender.py:transition_state():387] send defer: 2 +2022-07-29 23:28:51,731 DEBUG SenderThread:2561171 [sender.py:send():234] send: stats +2022-07-29 23:28:51,731 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:28:51,731 INFO HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-29 23:28:51,731 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: defer +2022-07-29 23:28:51,731 INFO SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-29 23:28:51,731 INFO SenderThread:2561171 [sender.py:transition_state():387] send defer: 3 +2022-07-29 23:28:51,732 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:28:51,732 INFO HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-29 23:28:51,732 DEBUG SenderThread:2561171 [sender.py:send():234] send: summary +2022-07-29 23:28:51,732 INFO SenderThread:2561171 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-29 23:28:51,732 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: defer +2022-07-29 23:28:51,732 INFO SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-29 23:28:51,732 INFO SenderThread:2561171 [sender.py:transition_state():387] send defer: 4 +2022-07-29 23:28:51,732 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:28:51,732 INFO HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-29 23:28:51,733 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: defer +2022-07-29 23:28:51,733 INFO SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-29 23:28:51,783 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:52,053 INFO SenderThread:2561171 [sender.py:transition_state():387] send defer: 5 +2022-07-29 23:28:52,053 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:52,054 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:28:52,054 INFO HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-29 23:28:52,054 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: defer +2022-07-29 23:28:52,054 INFO SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-29 23:28:52,054 INFO SenderThread:2561171 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-29 23:28:52,155 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:52,604 INFO Thread-8 :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:28:52,605 INFO SenderThread:2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json +2022-07-29 23:28:52,605 INFO SenderThread:2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml +2022-07-29 23:28:52,605 INFO SenderThread:2561171 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files +2022-07-29 23:28:52,605 INFO SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml config.yaml +2022-07-29 23:28:52,605 INFO SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt requirements.txt +2022-07-29 23:28:52,606 INFO SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log output.log +2022-07-29 23:28:52,606 INFO SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json wandb-summary.json +2022-07-29 23:28:52,610 INFO SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-metadata.json wandb-metadata.json +2022-07-29 23:28:52,610 INFO SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-29 23:28:52,610 INFO SenderThread:2561171 [sender.py:transition_state():387] send defer: 6 +2022-07-29 23:28:52,610 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:52,611 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:28:52,611 INFO HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-29 23:28:52,611 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: defer +2022-07-29 23:28:52,612 INFO SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-29 23:28:52,612 INFO SenderThread:2561171 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 23:28:52,713 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:52,713 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:52,816 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:52,816 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:52,918 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:52,918 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:53,020 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:53,020 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:53,079 INFO Thread-14 :2561171 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt +2022-07-29 23:28:53,111 INFO Thread-13 :2561171 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml +2022-07-29 23:28:53,122 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:53,122 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:53,141 INFO Thread-16 :2561171 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json +2022-07-29 23:28:53,224 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:53,224 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:53,325 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:53,326 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:53,341 INFO Thread-15 :2561171 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log +2022-07-29 23:28:53,427 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:53,427 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:53,529 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:53,529 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:53,541 INFO Thread-7 :2561171 [sender.py:transition_state():387] send defer: 7 +2022-07-29 23:28:53,542 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:28:53,542 INFO HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-29 23:28:53,542 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: defer +2022-07-29 23:28:53,542 INFO SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-29 23:28:53,631 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:54,054 INFO SenderThread:2561171 [sender.py:transition_state():387] send defer: 8 +2022-07-29 23:28:54,055 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:54,055 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:28:54,055 INFO HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-29 23:28:54,056 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: defer +2022-07-29 23:28:54,056 INFO SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-29 23:28:54,056 INFO SenderThread:2561171 [sender.py:transition_state():387] send defer: 9 +2022-07-29 23:28:54,056 DEBUG SenderThread:2561171 [sender.py:send():234] send: final +2022-07-29 23:28:54,056 DEBUG SenderThread:2561171 [sender.py:send():234] send: footer +2022-07-29 23:28:54,057 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer +2022-07-29 23:28:54,057 INFO HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-29 23:28:54,057 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: defer +2022-07-29 23:28:54,057 INFO SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-29 23:28:54,157 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-29 23:28:54,157 DEBUG SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit +2022-07-29 23:28:54,157 INFO SenderThread:2561171 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 23:28:54,423 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: get_summary +2022-07-29 23:28:54,424 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-29 23:28:54,424 DEBUG HandlerThread:2561171 [handler.py:handle_request():130] handle_request: shutdown +2022-07-29 23:28:54,424 INFO HandlerThread:2561171 [handler.py:finish():731] shutting down handler +2022-07-29 23:28:55,057 INFO WriterThread:2561171 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/run-1dfdwyjl.wandb +2022-07-29 23:28:55,422 INFO SenderThread:2561171 [sender.py:finish():1070] shutting down sender +2022-07-29 23:28:55,422 INFO SenderThread:2561171 [file_pusher.py:finish():177] shutting down file pusher +2022-07-29 23:28:55,422 INFO SenderThread:2561171 [file_pusher.py:join():182] waiting for file pusher +2022-07-29 23:28:55,425 INFO MainThread:2561171 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220729_231127-1dfdwyjl/logs/debug.log b/wandb/run-20220729_231127-1dfdwyjl/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c50882c2a3cd04c3b19e693b0fe1bd34c7daea65 --- /dev/null +++ b/wandb/run-20220729_231127-1dfdwyjl/logs/debug.log @@ -0,0 +1,157 @@ +2022-07-29 23:11:27,769 INFO MainThread:2559923 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-29 23:11:27,769 INFO MainThread:2559923 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-29 23:11:27,769 INFO MainThread:2559923 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/logs/debug.log +2022-07-29 23:11:27,769 INFO MainThread:2559923 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/logs/debug-internal.log +2022-07-29 23:11:27,769 INFO MainThread:2559923 [wandb_init.py:init():404] calling init triggers +2022-07-29 23:11:27,769 INFO MainThread:2559923 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-29 23:11:27,769 INFO MainThread:2559923 [wandb_init.py:init():460] starting backend +2022-07-29 23:11:27,770 INFO MainThread:2559923 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-29 23:11:27,816 INFO MainThread:2559923 [backend.py:ensure_launched():216] starting backend process... +2022-07-29 23:11:27,860 INFO MainThread:2559923 [backend.py:ensure_launched():221] started backend process with pid: 2561171 +2022-07-29 23:11:27,862 INFO MainThread:2559923 [wandb_init.py:init():469] backend started and connected +2022-07-29 23:11:27,876 INFO MainThread:2559923 [wandb_init.py:init():533] updated telemetry +2022-07-29 23:11:27,988 INFO MainThread:2559923 [wandb_init.py:init():563] communicating current version +2022-07-29 23:11:28,720 INFO MainThread:2559923 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-29 23:11:28,720 INFO MainThread:2559923 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-29 23:11:28,894 INFO MainThread:2559923 [wandb_init.py:init():606] starting run threads in backend +2022-07-29 23:11:31,333 INFO MainThread:2559923 [wandb_run.py:_console_start():1810] atexit reg +2022-07-29 23:11:31,334 INFO MainThread:2559923 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-29 23:11:31,335 INFO MainThread:2559923 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-29 23:11:31,337 INFO MainThread:2559923 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-29 23:11:31,337 INFO MainThread:2559923 [wandb_init.py:init():633] run started, returning control to user process +2022-07-29 23:28:49,333 INFO MainThread:2559923 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-29 23:28:49,339 INFO MainThread:2559923 [wandb_run.py:_restore():1752] restore +2022-07-29 23:28:51,681 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73918 + total_bytes: 73918 +} + +2022-07-29 23:28:52,054 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 1 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73918 + total_bytes: 73918 +} + +2022-07-29 23:28:52,612 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73918 + total_bytes: 372393 +} + +2022-07-29 23:28:52,715 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 73918 + total_bytes: 372393 +} + +2022-07-29 23:28:52,817 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 372393 + total_bytes: 372393 +} + +2022-07-29 23:28:52,919 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 372393 + total_bytes: 372393 +} + +2022-07-29 23:28:53,021 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 372393 + total_bytes: 372393 +} + +2022-07-29 23:28:53,123 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 372393 + total_bytes: 372393 +} + +2022-07-29 23:28:53,224 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 372393 + total_bytes: 372393 +} + +2022-07-29 23:28:53,326 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 372393 + total_bytes: 372393 +} + +2022-07-29 23:28:53,428 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 372393 + total_bytes: 372393 +} + +2022-07-29 23:28:53,530 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 372393 + total_bytes: 372393 +} + +2022-07-29 23:28:54,056 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 372393 + total_bytes: 372393 +} + +2022-07-29 23:28:54,422 INFO MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 372393 + total_bytes: 372393 +} +local_info { +} + +2022-07-29 23:28:56,009 INFO MainThread:2559923 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220729_231127-1dfdwyjl/run-1dfdwyjl.wandb b/wandb/run-20220729_231127-1dfdwyjl/run-1dfdwyjl.wandb new file mode 100644 index 0000000000000000000000000000000000000000..cfe62d5f00c04d5c6cdf40ed10b4bb46f6b01776 --- /dev/null +++ b/wandb/run-20220729_231127-1dfdwyjl/run-1dfdwyjl.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7c5ac63cb533b27df989c5265415ce13e339d5f8c056e1f6c9a0fc3902ba3e4 +size 424833 diff --git a/wandb/run-20220730_074919-12xoayks/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_074919-12xoayks/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac --- /dev/null +++ b/wandb/run-20220730_074919-12xoayks/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220730_074919-12xoayks/files/config.yaml b/wandb/run-20220730_074919-12xoayks/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..795906cc3387404bc5273c1d23fc6ac7f3018d93 --- /dev/null +++ b/wandb/run-20220730_074919-12xoayks/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659167359 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220730_074919-12xoayks/files/diff.patch b/wandb/run-20220730_074919-12xoayks/files/diff.patch new file mode 100644 index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642 --- /dev/null +++ b/wandb/run-20220730_074919-12xoayks/files/diff.patch @@ -0,0 +1,10 @@ +diff --git a/.gitattributes b/.gitattributes +index 755356a..f0eef4b 100644 +--- a/.gitattributes ++++ b/.gitattributes +@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text + *.zip filter=lfs diff=lfs merge=lfs -text + *.zstandard filter=lfs diff=lfs merge=lfs -text + *tfevents* filter=lfs diff=lfs merge=lfs -text ++*.wandb filter=lfs diff=lfs merge=lfs -text +\ No newline at end of file diff --git a/wandb/run-20220730_074919-12xoayks/files/output.log b/wandb/run-20220730_074919-12xoayks/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1f32bffe1ae0a93a1b1289f1eea0fe5071c9d9fd --- /dev/null +++ b/wandb/run-20220730_074919-12xoayks/files/output.log @@ -0,0 +1,1578 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul30_07-49-15_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=32, +per_device_train_batch_size=32, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.71it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 439.12it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'bias'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'codevectors'), ('project_q', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('project_hid', 'kernel')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7328/9523 [00:00<00:00, 6747.47ex/s] +removing punctuation from train split #1: 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7280/9523 [00:00<00:00, 8744.68ex/s] +removing punctuation from train split #2: 78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 7416/9523 [00:00<00:00, 9086.47ex/s] +removing punctuation from train split #3: 68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 6487/9523 [00:00<00:00, 9377.94ex/s] +removing punctuation from train split #4: 68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 6434/9523 [00:00<00:00, 9148.65ex/s] +removing punctuation from train split #5: 27%|█████████████████████████████████████████████████████▌ | 2592/9523 [00:00<00:00, 8716.13ex/s] +removing punctuation from train split #6: 28%|██████████████████████████████████████████████████████▋ | 2641/9523 [00:00<00:00, 8868.89ex/s] +removing punctuation from train split #7: 27%|████████████████████████████████████████████████████▉ | 2557/9523 [00:00<00:00, 8604.97ex/s] +removing punctuation from train split #8: 18%|███████████████████████████████████▍ | 1716/9523 [00:00<00:00, 8626.61ex/s] +removing punctuation from train split #9: 17%|█████████████████████████████████▏ | 1606/9523 [00:00<00:00, 8123.25ex/s] +removing punctuation from train split #10: 9%|█████████████████▏ | 828/9523 [00:00<00:01, 8276.53ex/s] +removing punctuation from train split #11: 8%|████████████████▎ | 788/9523 [00:00<00:01, 7870.27ex/s] +removing punctuation from train split #12: 0%| | 0/9522 [00:00 to the vocabulary +Adding to the vocabulary +2022-07-30 08:01:17.653063: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2022-07-30 08:01:17.653114: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) +/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) +INFO:__main__:***** Running training ***** +INFO:__main__: Num examples = 302693 +INFO:__main__: Num Epochs = 40 +INFO:__main__: Instantaneous batch size per device = 32 +INFO:__main__: Num gradient accumulation steps = 1 +INFO:__main__: Total train batch size (w. parallel & distributed) = 256 +INFO:__main__: Total optimization steps = 47280 +INFO:__main__: Gradient checkpointing: True +INFO:__main__: Use scan: False +INFO:__main__: Fuse matmuls: False +Epoch ... (1/40): 0%| | 0/40 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) + File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss + out_tree, out_flat = f_pmapped_(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f + out = pxla.xla_pmap( + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind + return map_bind(self, fun, *args, **params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process + return trace.process_map(self, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call + return primitive.impl(f, *tracers, **params) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl + compiled_fun, fingerprint = parallel_callable( + File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun + ans = call(fun, *args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable + pmap_executable = pmap_computation.compile() + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile + self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo + compiled = dispatch.compile_or_get_cached( + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached + return backend_compile(backend, computation, compile_options, host_callbacks) + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile + return backend.compile(built_c, compile_options=options) +jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 22.29G of 15.48G hbm. Exceeded hbm capacity by 6.81G. +Total hbm usage >= 22.81G: + reserved 530.00M + program 11.45G + arguments 10.84G +Output size 10.76G; shares 10.76G with arguments. +Program hbm requirement 11.45G: + global 244.0K + scoped 72.08M + HLO temp 11.38G (99.3% utilization: Unpadded (10.50G) Padded (10.58G), 7.1% fragmentation (826.39M)) + Largest program allocations in hbm: + 1. Size: 1.95G + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[32,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 1.90G + Extra memory due to padding: 50.78M (1.0x expansion) + XLA label: fusion.180.remat6 = fusion(bitcast.7587, bitcast.7585, fusion.14562), kind=kOutput, calls=fused_computation.176.clone.clone.clone.clone.clone.clone + Allocation type: HLO temp + ========================== + 2. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,5120]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.1570.remat = fusion(fusion.6368, get-tuple-element.21296, bitcast.11253), kind=kOutput, calls=fused_computation.1410.clone + Allocation type: HLO temp + ========================== + 3. Size: 312.19M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[32,999,5120]{2,0,1:T(8,128)(2,1)} + Unpadded size: 312.19M + XLA label: fusion.24115 = fusion(fusion.1570.remat, get-tuple-element.21271, get-tuple-element.21270, get-tuple-element.21305, ...(+1)), kind=kOutput, calls=fused_computation.18872 + Allocation type: HLO temp + ========================== + 4. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.7152 = fusion(get-tuple-element.9850, get-tuple-element.20844, get-tuple-element.10084, get-tuple-element.20843, ...(+3)), kind=kLoop, calls=fused_computation.6596 + Allocation type: HLO temp + ========================== + 5. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/15/15/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20091 = fusion(fusion.6802.remat, get-tuple-element.13360, fusion.1630, bitcast.11123), kind=kOutput, calls=fused_computation.18657 + Allocation type: HLO temp + ========================== + 6. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/33/33/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20055 = fusion(fusion.6766.remat, get-tuple-element.13680, fusion.1612, bitcast.11087), kind=kOutput, calls=fused_computation.18621 + Allocation type: HLO temp + ========================== + 7. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.6654.remat2 = fusion(get-tuple-element.21303, copy.14050.remat2, get-tuple-element.21290, bitcast.11252), kind=kOutput, calls=fused_computation.6098.clone.clone + Allocation type: HLO temp + ========================== + 8. Size: 156.09M + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: copy.16362 = copy(bitcast.14787) + Allocation type: HLO temp + ========================== + 9. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20123 = fusion(fusion.6836, copy.16362, get-tuple-element.14006, fusion.8933, ...(+1)), kind=kLoop, calls=fused_computation.18689 + Allocation type: HLO temp + ========================== + 10. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/32/32/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20057 = fusion(fusion.6768.remat, get-tuple-element.13664, fusion.1613, bitcast.11089), kind=kOutput, calls=fused_computation.18623 + Allocation type: HLO temp + ========================== + 11. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20121 = fusion(get-tuple-element.11240, get-tuple-element.13248, fusion.1645, bitcast.11153), kind=kOutput, calls=fused_computation.18687 + Allocation type: HLO temp + ========================== + 12. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20119 = fusion(get-tuple-element.11244, get-tuple-element.13264, fusion.1644, bitcast.11151), kind=kOutput, calls=fused_computation.18685 + Allocation type: HLO temp + ========================== + 13. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20117 = fusion(get-tuple-element.11248, get-tuple-element.13440, fusion.1643, bitcast.11149), kind=kOutput, calls=fused_computation.18683 + Allocation type: HLO temp + ========================== + 14. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20115 = fusion(get-tuple-element.11252, get-tuple-element.13616, fusion.1642, bitcast.11147), kind=kOutput, calls=fused_computation.18681 + Allocation type: HLO temp + ========================== + 15. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20113 = fusion(get-tuple-element.11256, get-tuple-element.13792, fusion.1641, bitcast.11145), kind=kOutput, calls=fused_computation.18679 + Allocation type: HLO temp + ========================== + 16. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20111 = fusion(get-tuple-element.11260, get-tuple-element.13936, fusion.1640, bitcast.11143), kind=kOutput, calls=fused_computation.18677 + Allocation type: HLO temp + ========================== + 17. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20109 = fusion(get-tuple-element.11264, get-tuple-element.13952, fusion.1639, bitcast.11141), kind=kOutput, calls=fused_computation.18675 + Allocation type: HLO temp + ========================== + 18. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20107 = fusion(get-tuple-element.11268, get-tuple-element.13968, fusion.1638, bitcast.11139), kind=kOutput, calls=fused_computation.18673 + Allocation type: HLO temp + ========================== + 19. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20105 = fusion(get-tuple-element.11272, get-tuple-element.13984, fusion.1637, bitcast.11137), kind=kOutput, calls=fused_computation.18671 + Allocation type: HLO temp + ========================== + 20. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20103 = fusion(get-tuple-element.11276, get-tuple-element.14000, fusion.1636, bitcast.11135), kind=kOutput, calls=fused_computation.18669 + Allocation type: HLO temp + ========================== +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1605, in + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) +jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 22.29G of 15.48G hbm. Exceeded hbm capacity by 6.81G. +Total hbm usage >= 22.81G: + reserved 530.00M + program 11.45G + arguments 10.84G +Output size 10.76G; shares 10.76G with arguments. +Program hbm requirement 11.45G: + global 244.0K + scoped 72.08M + HLO temp 11.38G (99.3% utilization: Unpadded (10.50G) Padded (10.58G), 7.1% fragmentation (826.39M)) + Largest program allocations in hbm: + 1. Size: 1.95G + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[32,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 1.90G + Extra memory due to padding: 50.78M (1.0x expansion) + XLA label: fusion.180.remat6 = fusion(bitcast.7587, bitcast.7585, fusion.14562), kind=kOutput, calls=fused_computation.176.clone.clone.clone.clone.clone.clone + Allocation type: HLO temp + ========================== + 2. Size: 624.38M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,5120]{2,0,1:T(8,128)} + Unpadded size: 624.38M + XLA label: fusion.1570.remat = fusion(fusion.6368, get-tuple-element.21296, bitcast.11253), kind=kOutput, calls=fused_computation.1410.clone + Allocation type: HLO temp + ========================== + 3. Size: 312.19M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[32,999,5120]{2,0,1:T(8,128)(2,1)} + Unpadded size: 312.19M + XLA label: fusion.24115 = fusion(fusion.1570.remat, get-tuple-element.21271, get-tuple-element.21270, get-tuple-element.21305, ...(+1)), kind=kOutput, calls=fused_computation.18872 + Allocation type: HLO temp + ========================== + 4. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.7152 = fusion(get-tuple-element.9850, get-tuple-element.20844, get-tuple-element.10084, get-tuple-element.20843, ...(+3)), kind=kLoop, calls=fused_computation.6596 + Allocation type: HLO temp + ========================== + 5. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/15/15/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20091 = fusion(fusion.6802.remat, get-tuple-element.13360, fusion.1630, bitcast.11123), kind=kOutput, calls=fused_computation.18657 + Allocation type: HLO temp + ========================== + 6. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/33/33/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20055 = fusion(fusion.6766.remat, get-tuple-element.13680, fusion.1612, bitcast.11087), kind=kOutput, calls=fused_computation.18621 + Allocation type: HLO temp + ========================== + 7. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.6654.remat2 = fusion(get-tuple-element.21303, copy.14050.remat2, get-tuple-element.21290, bitcast.11252), kind=kOutput, calls=fused_computation.6098.clone.clone + Allocation type: HLO temp + ========================== + 8. Size: 156.09M + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: copy.16362 = copy(bitcast.14787) + Allocation type: HLO temp + ========================== + 9. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20123 = fusion(fusion.6836, copy.16362, get-tuple-element.14006, fusion.8933, ...(+1)), kind=kLoop, calls=fused_computation.18689 + Allocation type: HLO temp + ========================== + 10. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/32/32/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20057 = fusion(fusion.6768.remat, get-tuple-element.13664, fusion.1613, bitcast.11089), kind=kOutput, calls=fused_computation.18623 + Allocation type: HLO temp + ========================== + 11. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20121 = fusion(get-tuple-element.11240, get-tuple-element.13248, fusion.1645, bitcast.11153), kind=kOutput, calls=fused_computation.18687 + Allocation type: HLO temp + ========================== + 12. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20119 = fusion(get-tuple-element.11244, get-tuple-element.13264, fusion.1644, bitcast.11151), kind=kOutput, calls=fused_computation.18685 + Allocation type: HLO temp + ========================== + 13. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20117 = fusion(get-tuple-element.11248, get-tuple-element.13440, fusion.1643, bitcast.11149), kind=kOutput, calls=fused_computation.18683 + Allocation type: HLO temp + ========================== + 14. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20115 = fusion(get-tuple-element.11252, get-tuple-element.13616, fusion.1642, bitcast.11147), kind=kOutput, calls=fused_computation.18681 + Allocation type: HLO temp + ========================== + 15. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20113 = fusion(get-tuple-element.11256, get-tuple-element.13792, fusion.1641, bitcast.11145), kind=kOutput, calls=fused_computation.18679 + Allocation type: HLO temp + ========================== + 16. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20111 = fusion(get-tuple-element.11260, get-tuple-element.13936, fusion.1640, bitcast.11143), kind=kOutput, calls=fused_computation.18677 + Allocation type: HLO temp + ========================== + 17. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20109 = fusion(get-tuple-element.11264, get-tuple-element.13952, fusion.1639, bitcast.11141), kind=kOutput, calls=fused_computation.18675 + Allocation type: HLO temp + ========================== + 18. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20107 = fusion(get-tuple-element.11268, get-tuple-element.13968, fusion.1638, bitcast.11139), kind=kOutput, calls=fused_computation.18673 + Allocation type: HLO temp + ========================== + 19. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20105 = fusion(get-tuple-element.11272, get-tuple-element.13984, fusion.1637, bitcast.11137), kind=kOutput, calls=fused_computation.18671 + Allocation type: HLO temp + ========================== + 20. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[32,999,1280]{2,0,1:T(8,128)} + Unpadded size: 156.09M + XLA label: fusion.20103 = fusion(get-tuple-element.11276, get-tuple-element.14000, fusion.1636, bitcast.11135), kind=kOutput, calls=fused_computation.18669 + Allocation type: HLO temp + ========================== \ No newline at end of file diff --git a/wandb/run-20220730_074919-12xoayks/files/requirements.txt b/wandb/run-20220730_074919-12xoayks/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220730_074919-12xoayks/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220730_074919-12xoayks/files/wandb-metadata.json b/wandb/run-20220730_074919-12xoayks/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..41ff4dfd696b810ef0e4d314ba8b17d3b68d086c --- /dev/null +++ b/wandb/run-20220730_074919-12xoayks/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-30T07:49:23.141477", + "startedAt": "2022-07-30T07:49:19.732765", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=./", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=32", + "--per_device_eval_batch_size=32", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json b/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..eb7e27891b1ad34db9f1f3ef6a20af17baa76819 --- /dev/null +++ b/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 994}} \ No newline at end of file diff --git a/wandb/run-20220730_074919-12xoayks/logs/debug-internal.log b/wandb/run-20220730_074919-12xoayks/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..fbaf5f4190d3a5ccbade8b3a0a946d6eb296b5d8 --- /dev/null +++ b/wandb/run-20220730_074919-12xoayks/logs/debug-internal.log @@ -0,0 +1,526 @@ +2022-07-30 07:49:20,650 INFO MainThread:2049141 [internal.py:wandb_internal():87] W&B internal server running at pid: 2049141, started at: 2022-07-30 07:49:20.650404 +2022-07-30 07:49:20,652 INFO WriterThread:2049141 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/run-12xoayks.wandb +2022-07-30 07:49:20,652 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: check_version +2022-07-30 07:49:20,653 DEBUG SenderThread:2049141 [sender.py:send():234] send: header +2022-07-30 07:49:20,653 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: check_version +2022-07-30 07:49:20,693 DEBUG SenderThread:2049141 [sender.py:send():234] send: run +2022-07-30 07:49:20,858 INFO SenderThread:2049141 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files +2022-07-30 07:49:20,858 INFO SenderThread:2049141 [sender.py:_start_run_threads():804] run started: 12xoayks with start time 1659167359 +2022-07-30 07:49:20,858 DEBUG SenderThread:2049141 [sender.py:send():234] send: summary +2022-07-30 07:49:20,859 INFO SenderThread:2049141 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 07:49:20,859 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: run_start +2022-07-30 07:49:21,863 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json +2022-07-30 07:49:23,141 DEBUG HandlerThread:2049141 [meta.py:__init__():40] meta init +2022-07-30 07:49:23,141 DEBUG HandlerThread:2049141 [meta.py:__init__():54] meta init done +2022-07-30 07:49:23,141 DEBUG HandlerThread:2049141 [meta.py:probe():214] probe +2022-07-30 07:49:23,142 DEBUG HandlerThread:2049141 [meta.py:_setup_git():204] setup git +2022-07-30 07:49:23,180 DEBUG HandlerThread:2049141 [meta.py:_setup_git():211] setup git done +2022-07-30 07:49:23,180 DEBUG HandlerThread:2049141 [meta.py:_save_code():92] save code +2022-07-30 07:49:23,193 DEBUG HandlerThread:2049141 [meta.py:_save_code():113] save code done +2022-07-30 07:49:23,193 DEBUG HandlerThread:2049141 [meta.py:_save_patches():130] save patches +2022-07-30 07:49:23,284 DEBUG HandlerThread:2049141 [meta.py:_save_patches():172] save patches done +2022-07-30 07:49:23,284 DEBUG HandlerThread:2049141 [meta.py:_save_pip():58] save pip +2022-07-30 07:49:23,284 DEBUG HandlerThread:2049141 [meta.py:_save_pip():72] save pip done +2022-07-30 07:49:23,284 DEBUG HandlerThread:2049141 [meta.py:probe():252] probe done +2022-07-30 07:49:23,287 DEBUG SenderThread:2049141 [sender.py:send():234] send: files +2022-07-30 07:49:23,287 INFO SenderThread:2049141 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-30 07:49:23,288 INFO SenderThread:2049141 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-30 07:49:23,288 INFO SenderThread:2049141 [sender.py:_save_file():939] saving file diff.patch with policy now +2022-07-30 07:49:23,294 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:49:23,297 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:49:23,768 INFO Thread-11 :2049141 [upload_job.py:push():137] Uploaded file /tmp/tmpqr2f7h1jwandb/c7b0qlvh-wandb-metadata.json +2022-07-30 07:49:23,776 INFO Thread-13 :2049141 [upload_job.py:push():137] Uploaded file /tmp/tmpqr2f7h1jwandb/1z8w735k-diff.patch +2022-07-30 07:49:23,863 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/diff.patch +2022-07-30 07:49:23,863 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-metadata.json +2022-07-30 07:49:23,863 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/code/run_flax_speech_recognition_ctc.py +2022-07-30 07:49:23,863 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:49:23,863 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/requirements.txt +2022-07-30 07:49:23,863 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/code +2022-07-30 07:49:23,979 INFO Thread-12 :2049141 [upload_job.py:push():137] Uploaded file /tmp/tmpqr2f7h1jwandb/3m9yt5mk-code/run_flax_speech_recognition_ctc.py +2022-07-30 07:49:25,864 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:49:27,864 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:49:29,865 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:49:31,866 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:49:37,869 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:49:38,432 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:49:38,432 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:49:39,870 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:49:51,223 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:49:51,875 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:49:53,571 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:49:53,571 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:49:53,876 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:50:03,880 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:50:05,881 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:50:07,882 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:50:08,710 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:50:08,710 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:50:21,295 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:50:22,889 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:50:23,847 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:50:23,848 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:50:39,000 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:50:39,000 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:50:51,366 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:50:54,167 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:50:54,168 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:51:05,909 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:07,909 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:09,354 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:51:09,354 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:51:09,910 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:11,911 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:13,912 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:15,913 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:17,914 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:19,915 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:21,435 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:51:21,916 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:23,917 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:24,523 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:51:24,523 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:51:26,918 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:28,919 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:30,920 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:32,921 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:34,922 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:36,923 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:38,924 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:39,670 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:51:39,671 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:51:40,929 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:42,930 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:44,931 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:46,932 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:48,932 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:50,934 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:51,510 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:51:52,938 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:54,842 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:51:54,843 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:51:54,939 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:56,940 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:51:58,941 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:00,942 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:02,943 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:04,944 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:06,945 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:08,946 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:09,982 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:52:09,983 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:52:10,947 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:12,948 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:14,949 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:16,950 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:18,952 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:20,953 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:21,591 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:52:22,954 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:24,955 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:25,125 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:52:25,125 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:52:26,956 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:28,957 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:30,957 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:32,959 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:34,960 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:36,961 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:38,962 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:40,264 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:52:40,265 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:52:40,963 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:42,964 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:44,965 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:46,968 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:48,970 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:50,969 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:51,681 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:52:52,970 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:54,972 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:55,405 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:52:55,405 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:52:56,973 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:52:58,974 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:00,975 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:02,976 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:04,977 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:06,978 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:08,979 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:10,564 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:53:10,564 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:53:10,980 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:12,981 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:14,982 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:16,984 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:18,984 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:20,985 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:21,760 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:53:22,986 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:24,987 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:25,711 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:53:25,711 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:53:26,988 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:28,990 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:30,991 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:32,992 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:34,993 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:36,994 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:38,995 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:40,847 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:53:40,848 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:53:40,996 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:42,997 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:44,998 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:46,999 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:49,000 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:51,001 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:51,846 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:53:53,003 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:55,003 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:55,989 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:53:55,989 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:53:57,004 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:53:59,005 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:01,006 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:03,007 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:05,008 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:07,009 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:09,010 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:11,011 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:11,149 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:54:11,149 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:54:13,012 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:15,013 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:17,014 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:19,015 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:21,016 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:21,922 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:54:23,017 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:25,018 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:26,293 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:54:26,294 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:54:27,019 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:29,020 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:31,021 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:54:41,436 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:54:41,436 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:54:52,000 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:54:56,579 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:54:56,580 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:55:11,718 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:55:11,718 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:55:22,079 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:55:26,855 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:55:26,855 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:55:41,992 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:55:41,993 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:55:52,160 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:55:57,152 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:55:57,153 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:56:05,066 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:07,068 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:09,069 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:11,070 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:12,295 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:56:12,295 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:56:13,072 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:15,073 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:17,074 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:19,076 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:21,077 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:22,240 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:56:23,078 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:25,079 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:27,081 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:29,082 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:29,109 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:56:29,110 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:56:31,083 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:33,084 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:35,085 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:38,087 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:40,088 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:56:44,258 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:56:44,261 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:56:52,311 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:56:59,398 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:56:59,399 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:57:04,098 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:14,102 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:14,536 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:57:14,537 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:57:16,103 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:18,104 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:20,105 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:22,106 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:22,386 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:57:24,107 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:26,108 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:28,109 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:29,675 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:57:29,675 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:57:30,110 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:32,111 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:34,112 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:36,113 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:38,114 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:40,115 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:42,116 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:44,117 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:44,903 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:57:44,903 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:57:48,119 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:50,120 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:52,121 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:52,465 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:57:54,123 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:57:56,124 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:00,053 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:58:00,054 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:58:15,191 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:58:15,192 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:58:22,531 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:58:26,137 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:30,422 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:58:30,422 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:58:36,141 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:39,143 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:41,144 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:43,145 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:45,146 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:45,614 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:58:45,614 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:58:47,147 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:49,148 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:51,149 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:52,600 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:58:53,150 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:55,151 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:57,152 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:58:59,153 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:00,788 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:59:00,789 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:59:01,154 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:03,155 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:05,156 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:07,157 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:11,159 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:13,160 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:15,161 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:15,931 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:59:15,932 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:59:17,162 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:19,163 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:21,164 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:22,675 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:59:23,166 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:25,167 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:27,168 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:29,170 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:31,112 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:59:31,112 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:59:31,170 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:33,171 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:35,172 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:37,173 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:39,175 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:41,176 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:43,177 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:45,178 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:46,268 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 07:59:46,268 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 07:59:47,179 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:49,180 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:51,181 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:52,761 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 07:59:53,182 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:55,183 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:57,184 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 07:59:59,185 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:00:01,428 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:00:01,428 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:00:16,561 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:00:16,561 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:00:22,834 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:00:31,695 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:00:31,696 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:00:43,203 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:00:46,887 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:00:46,887 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:00:50,206 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:00:52,914 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:00:56,209 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:01:02,081 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:01:02,082 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:01:04,212 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:01:10,215 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:01:17,218 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:01:17,218 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:01:17,219 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:01:19,219 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:01:22,990 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:01:23,220 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:01:27,222 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:01:29,223 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:01:32,360 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:01:32,360 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:01:35,226 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:01:47,601 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:01:47,602 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:01:53,067 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:02:02,879 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:02:02,880 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:02:18,049 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:02:18,049 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:02:20,243 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:02:23,142 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:02:28,245 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:02:30,246 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:02:33,227 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:02:33,227 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:02:48,394 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:02:48,394 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:02:53,216 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:03:03,546 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:03:03,546 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:03:18,684 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:03:18,685 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:03:23,296 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:03:33,823 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:03:33,824 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:03:48,968 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:03:48,968 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:03:53,372 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:04:04,142 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:04:04,143 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:04:19,278 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:04:19,278 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:04:23,452 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:04:34,413 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:04:34,413 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:04:49,554 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:04:49,554 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:04:53,529 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:05:04,700 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:05:04,701 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:05:19,843 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:05:19,843 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:05:23,606 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:05:34,979 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:05:34,979 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:05:50,116 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:05:50,117 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:05:53,680 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:05:55,098 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:55,098 DEBUG SenderThread:2049141 [sender.py:send():234] send: telemetry +2022-07-30 08:05:55,099 DEBUG SenderThread:2049141 [sender.py:send():234] send: exit +2022-07-30 08:05:55,099 INFO SenderThread:2049141 [sender.py:send_exit():366] handling exit code: 1 +2022-07-30 08:05:55,101 INFO SenderThread:2049141 [sender.py:send_exit():368] handling runtime: 994 +2022-07-30 08:05:55,102 INFO SenderThread:2049141 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 08:05:55,102 INFO SenderThread:2049141 [sender.py:send_exit():374] send defer +2022-07-30 08:05:55,102 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:55,103 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:05:55,103 INFO HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-30 08:05:55,103 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: defer +2022-07-30 08:05:55,103 INFO SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-30 08:05:55,103 INFO SenderThread:2049141 [sender.py:transition_state():387] send defer: 1 +2022-07-30 08:05:55,104 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:05:55,104 INFO HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-30 08:05:55,183 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: defer +2022-07-30 08:05:55,183 INFO SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-30 08:05:55,184 INFO SenderThread:2049141 [sender.py:transition_state():387] send defer: 2 +2022-07-30 08:05:55,184 DEBUG SenderThread:2049141 [sender.py:send():234] send: stats +2022-07-30 08:05:55,184 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:05:55,184 INFO HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-30 08:05:55,185 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: defer +2022-07-30 08:05:55,185 INFO SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-30 08:05:55,185 INFO SenderThread:2049141 [sender.py:transition_state():387] send defer: 3 +2022-07-30 08:05:55,185 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:05:55,185 INFO HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-30 08:05:55,185 DEBUG SenderThread:2049141 [sender.py:send():234] send: summary +2022-07-30 08:05:55,186 INFO SenderThread:2049141 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 08:05:55,186 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: defer +2022-07-30 08:05:55,186 INFO SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-30 08:05:55,186 INFO SenderThread:2049141 [sender.py:transition_state():387] send defer: 4 +2022-07-30 08:05:55,186 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:05:55,186 INFO HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-30 08:05:55,186 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: defer +2022-07-30 08:05:55,187 INFO SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-30 08:05:55,205 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:55,326 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json +2022-07-30 08:05:55,326 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:05:55,431 INFO SenderThread:2049141 [sender.py:transition_state():387] send defer: 5 +2022-07-30 08:05:55,431 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:55,432 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:05:55,432 INFO HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-30 08:05:55,432 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: defer +2022-07-30 08:05:55,432 INFO SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-30 08:05:55,432 INFO SenderThread:2049141 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-30 08:05:55,533 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:56,326 INFO Thread-8 :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/config.yaml +2022-07-30 08:05:56,327 INFO SenderThread:2049141 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files +2022-07-30 08:05:56,328 INFO SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/config.yaml config.yaml +2022-07-30 08:05:56,328 INFO SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/diff.patch diff.patch +2022-07-30 08:05:56,328 INFO SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/requirements.txt requirements.txt +2022-07-30 08:05:56,332 INFO SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log output.log +2022-07-30 08:05:56,332 INFO SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json wandb-summary.json +2022-07-30 08:05:56,335 INFO SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-metadata.json wandb-metadata.json +2022-07-30 08:05:56,338 INFO SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-30 08:05:56,338 INFO SenderThread:2049141 [sender.py:transition_state():387] send defer: 6 +2022-07-30 08:05:56,338 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:56,343 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:05:56,343 INFO HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-30 08:05:56,343 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: defer +2022-07-30 08:05:56,344 INFO SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-30 08:05:56,344 INFO SenderThread:2049141 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 08:05:56,444 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:56,444 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:56,546 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:56,546 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:56,647 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:56,648 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:56,749 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:56,749 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:56,832 INFO Thread-15 :2049141 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/requirements.txt +2022-07-30 08:05:56,836 INFO Thread-14 :2049141 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/config.yaml +2022-07-30 08:05:56,851 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:56,851 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:56,856 INFO Thread-17 :2049141 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json +2022-07-30 08:05:56,953 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:56,953 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:57,037 INFO Thread-16 :2049141 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log +2022-07-30 08:05:57,054 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:57,055 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:57,157 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:57,157 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:57,237 INFO Thread-7 :2049141 [sender.py:transition_state():387] send defer: 7 +2022-07-30 08:05:57,238 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:05:57,238 INFO HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-30 08:05:57,238 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: defer +2022-07-30 08:05:57,238 INFO SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-30 08:05:57,258 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:57,799 INFO SenderThread:2049141 [sender.py:transition_state():387] send defer: 8 +2022-07-30 08:05:57,800 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:57,800 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:05:57,800 INFO HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-30 08:05:57,801 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: defer +2022-07-30 08:05:57,801 INFO SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-30 08:05:57,801 INFO SenderThread:2049141 [sender.py:transition_state():387] send defer: 9 +2022-07-30 08:05:57,802 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:05:57,802 INFO HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-30 08:05:57,802 DEBUG SenderThread:2049141 [sender.py:send():234] send: final +2022-07-30 08:05:57,802 DEBUG SenderThread:2049141 [sender.py:send():234] send: footer +2022-07-30 08:05:57,802 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: defer +2022-07-30 08:05:57,802 INFO SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-30 08:05:57,901 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:05:57,901 DEBUG SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:05:57,902 INFO SenderThread:2049141 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 08:05:58,160 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: get_summary +2022-07-30 08:05:58,161 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-30 08:05:58,161 DEBUG HandlerThread:2049141 [handler.py:handle_request():130] handle_request: shutdown +2022-07-30 08:05:58,162 INFO HandlerThread:2049141 [handler.py:finish():731] shutting down handler +2022-07-30 08:05:58,802 INFO WriterThread:2049141 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/run-12xoayks.wandb +2022-07-30 08:05:59,159 INFO SenderThread:2049141 [sender.py:finish():1070] shutting down sender +2022-07-30 08:05:59,159 INFO SenderThread:2049141 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 08:05:59,159 INFO SenderThread:2049141 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 08:05:59,163 INFO MainThread:2049141 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220730_074919-12xoayks/logs/debug.log b/wandb/run-20220730_074919-12xoayks/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..5b10edc5df5d44047496bbeef6e42098e1ff9808 --- /dev/null +++ b/wandb/run-20220730_074919-12xoayks/logs/debug.log @@ -0,0 +1,148 @@ +2022-07-30 07:49:19,734 INFO MainThread:2047809 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-30 07:49:19,734 INFO MainThread:2047809 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-30 07:49:19,734 INFO MainThread:2047809 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/logs/debug.log +2022-07-30 07:49:19,734 INFO MainThread:2047809 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/logs/debug-internal.log +2022-07-30 07:49:19,734 INFO MainThread:2047809 [wandb_init.py:init():404] calling init triggers +2022-07-30 07:49:19,734 INFO MainThread:2047809 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-30 07:49:19,734 INFO MainThread:2047809 [wandb_init.py:init():460] starting backend +2022-07-30 07:49:19,734 INFO MainThread:2047809 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-30 07:49:19,793 INFO MainThread:2047809 [backend.py:ensure_launched():216] starting backend process... +2022-07-30 07:49:19,838 INFO MainThread:2047809 [backend.py:ensure_launched():221] started backend process with pid: 2049141 +2022-07-30 07:49:19,840 INFO MainThread:2047809 [wandb_init.py:init():469] backend started and connected +2022-07-30 07:49:19,854 INFO MainThread:2047809 [wandb_init.py:init():533] updated telemetry +2022-07-30 07:49:19,967 INFO MainThread:2047809 [wandb_init.py:init():563] communicating current version +2022-07-30 07:49:20,691 INFO MainThread:2047809 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-30 07:49:20,692 INFO MainThread:2047809 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-30 07:49:20,859 INFO MainThread:2047809 [wandb_init.py:init():606] starting run threads in backend +2022-07-30 07:49:23,291 INFO MainThread:2047809 [wandb_run.py:_console_start():1810] atexit reg +2022-07-30 07:49:23,292 INFO MainThread:2047809 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-30 07:49:23,292 INFO MainThread:2047809 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-30 07:49:23,295 INFO MainThread:2047809 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-30 07:49:23,295 INFO MainThread:2047809 [wandb_init.py:init():633] run started, returning control to user process +2022-07-30 08:05:52,577 INFO MainThread:2047809 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-30 08:05:52,582 INFO MainThread:2047809 [wandb_run.py:_restore():1752] restore +2022-07-30 08:05:55,103 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 74317 +} + +2022-07-30 08:05:55,432 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 74317 +} + +2022-07-30 08:05:56,343 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 386444 +} + +2022-07-30 08:05:56,445 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 386444 +} + +2022-07-30 08:05:56,547 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 386444 + total_bytes: 386444 +} + +2022-07-30 08:05:56,648 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 386444 + total_bytes: 386444 +} + +2022-07-30 08:05:56,750 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 386444 + total_bytes: 386444 +} + +2022-07-30 08:05:56,852 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 386444 + total_bytes: 386444 +} + +2022-07-30 08:05:56,954 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 386444 + total_bytes: 386444 +} + +2022-07-30 08:05:57,056 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 386444 + total_bytes: 386444 +} + +2022-07-30 08:05:57,158 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 386444 + total_bytes: 386444 +} + +2022-07-30 08:05:57,800 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 386444 + total_bytes: 386444 +} + +2022-07-30 08:05:58,159 INFO MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 386444 + total_bytes: 386444 +} +local_info { +} + +2022-07-30 08:05:59,740 INFO MainThread:2047809 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220730_074919-12xoayks/run-12xoayks.wandb b/wandb/run-20220730_074919-12xoayks/run-12xoayks.wandb new file mode 100644 index 0000000000000000000000000000000000000000..c04aea630b2c69458564bd66ae80c86cd29a6a18 --- /dev/null +++ b/wandb/run-20220730_074919-12xoayks/run-12xoayks.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a022e831647d19c663a7f103a01c4beec1d2719db4226c57beea36e1706c209e +size 409024 diff --git a/wandb/run-20220730_081215-1j14rrmn/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_081215-1j14rrmn/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac --- /dev/null +++ b/wandb/run-20220730_081215-1j14rrmn/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220730_081215-1j14rrmn/files/config.yaml b/wandb/run-20220730_081215-1j14rrmn/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ae020ab8e073f88cec1f213742de06f10904bb2 --- /dev/null +++ b/wandb/run-20220730_081215-1j14rrmn/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659168735 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220730_081215-1j14rrmn/files/diff.patch b/wandb/run-20220730_081215-1j14rrmn/files/diff.patch new file mode 100644 index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642 --- /dev/null +++ b/wandb/run-20220730_081215-1j14rrmn/files/diff.patch @@ -0,0 +1,10 @@ +diff --git a/.gitattributes b/.gitattributes +index 755356a..f0eef4b 100644 +--- a/.gitattributes ++++ b/.gitattributes +@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text + *.zip filter=lfs diff=lfs merge=lfs -text + *.zstandard filter=lfs diff=lfs merge=lfs -text + *tfevents* filter=lfs diff=lfs merge=lfs -text ++*.wandb filter=lfs diff=lfs merge=lfs -text +\ No newline at end of file diff --git a/wandb/run-20220730_081215-1j14rrmn/files/output.log b/wandb/run-20220730_081215-1j14rrmn/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..6e45f5eac8626ff8ac6383af49a5c087f5ce3f26 --- /dev/null +++ b/wandb/run-20220730_081215-1j14rrmn/files/output.log @@ -0,0 +1,1569 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul30_08-12-12_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=24, +per_device_train_batch_size=24, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 77.69it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 446.17it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'bias'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('project_hid', 'kernel'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'bias'), ('quantizer', 'codevectors')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9143.24ex/s] +removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9051.11ex/s] +removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8869.01ex/s] +removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8515.83ex/s] +removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8734.38ex/s] +removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8108.33ex/s] +removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8402.66ex/s] +removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8749.77ex/s] +removing punctuation from train split #4: 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9005/9523 [00:01<00:00, 7703.37ex/s] +removing punctuation from train split #6: 76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 7204/9523 [00:00<00:00, 7350.85ex/s] +removing punctuation from train split #8: 77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 7342/9523 [00:00<00:00, 8981.46ex/s] +removing punctuation from train split #6: 85%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8066/9523 [00:01<00:00, 7711.07ex/s] +removing punctuation from train split #6: 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8933/9523 [00:01<00:00, 7986.69ex/s] +removing punctuation from train split #7: 94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8972/9523 [00:01<00:00, 7819.51ex/s] +removing punctuation from train split #8: 96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9104/9523 [00:01<00:00, 8332.99ex/s] +removing punctuation from train split #9: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8148/9523 [00:00<00:00, 9153.93ex/s] +removing punctuation from train split #10: 64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 6056/9523 [00:00<00:00, 8551.74ex/s] +removing punctuation from train split #9: 95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9070/9523 [00:01<00:00, 9172.58ex/s] +removing punctuation from train split #12: 35%|████████████████████████████████████████████████████████████████████▍ | 3326/9522 [00:00<00:00, 8433.42ex/s] +removing punctuation from train split #13: 35%|████████████████████████████████████████████████████████████████████ | 3304/9522 [00:00<00:00, 8383.23ex/s] +removing punctuation from train split #12: 44%|██████████████████████████████████████████████████████████████████████████████████████▏ | 4190/9522 [00:00<00:00, 8504.65ex/s] +removing punctuation from train split #15: 34%|███████████████████████████████████████████████████████████████████▍ | 3277/9522 [00:00<00:00, 8313.68ex/s] +removing punctuation from train split #16: 26%|██████████████████████████████████████████████████▌ | 2455/9522 [00:00<00:00, 8247.38ex/s] +removing punctuation from train split #17: 16%|████████████████████████████████▎ | 1567/9522 [00:00<00:01, 7901.22ex/s] +removing punctuation from train split #18: 8%|████████████████▏ | 781/9522 [00:00<00:01, 7806.36ex/s] +removing punctuation from train split #17: 25%|█████████████████████████████████████████████████▍ | 2402/9522 [00:00<00:00, 8104.35ex/s] +removing punctuation from train split #18: 17%|█████████████████████████████████▌ | 1632/9522 [00:00<00:00, 8216.82ex/s] +removing punctuation from train split #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8188.32ex/s] +removing punctuation from train split #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8679.44ex/s] +removing punctuation from train split #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8830.52ex/s] +removing punctuation from train split #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8859.71ex/s] +removing punctuation from train split #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8310.85ex/s] +removing punctuation from train split #16: 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9404/9522 [00:01<00:00, 8678.97ex/s] +removing punctuation from train split #18: 82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 7777/9522 [00:00<00:00, 8162.33ex/s] +removing punctuation from train split #17: 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9386/9522 [00:01<00:00, 8094.38ex/s] +removing punctuation from train split #18: 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8671/9522 [00:01<00:00, 8389.25ex/s] +removing punctuation from train split #19: 87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 8322/9522 [00:01<00:00, 8707.34ex/s] +removing punctuation from train split #20: 78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 7386/9522 [00:00<00:00, 7844.57ex/s] +removing punctuation from train split #19: 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9193/9522 [00:01<00:00, 7908.47ex/s] +removing punctuation from train split #21: 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 7626/9522 [00:00<00:00, 8588.83ex/s] +removing punctuation from train split #20: 96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9141/9522 [00:01<00:00, 8316.59ex/s] +removing punctuation from train split #22: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7706/9522 [00:00<00:00, 8578.86ex/s] +removing punctuation from train split #21: 98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9372/9522 [00:01<00:00, 8329.46ex/s] +removing punctuation from train split #22: 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 8564/9522 [00:01<00:00, 8072.87ex/s] +removing punctuation from train split #22: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9438/9522 [00:01<00:00, 8267.35ex/s] +removing punctuation from train split #23: 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7614/9522 [00:00<00:00, 7793.87ex/s] +removing punctuation from train split #23: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9432/9522 [00:01<00:00, 8451.31ex/s] +removing punctuation from train split #24: 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8036/9522 [00:01<00:00, 8343.29ex/s] +removing punctuation from train split #24: 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8961/9522 [00:01<00:00, 8612.13ex/s] +removing punctuation from train split #25: 87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8314/9522 [00:01<00:00, 9221.98ex/s] +removing punctuation from train split #26: 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8789/9522 [00:01<00:00, 8678.32ex/s] +removing punctuation from train split #25: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9305/9522 [00:01<00:00, 9430.23ex/s] +removing punctuation from train split #28: 82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 7820/9522 [00:00<00:00, 8492.68ex/s] +removing punctuation from train split #28: 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8773/9522 [00:01<00:00, 8799.55ex/s] +removing punctuation from train split #30: 63%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 5956/9522 [00:00<00:00, 8623.44ex/s] +removing punctuation from train split #30: 72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 6830/9522 [00:00<00:00, 8251.44ex/s] +removing punctuation from train split #31: 75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 7145/9522 [00:00<00:00, 9221.92ex/s] +removing punctuation from train split #29: 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8016/9522 [00:00<00:00, 8796.98ex/s] +removing punctuation from train split #29: 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8982/9522 [00:01<00:00, 9053.42ex/s] +removing punctuation from train split #30: 82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 7784/9522 [00:00<00:00, 8632.40ex/s] +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00028_of_00032.arrow8718/9522 [00:01<00:00, 8843.18ex/s] +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow +preprocess dataset #0: 0%|▉ | 41/9497 [00:01<03:33, 44.27ex/s] +preprocess dataset #1: 0%|▌ | 25/9497 [00:01<04:40, 33.76ex/s] +preprocess dataset #2: 1%|█▋ | 72/9497 [00:02<03:41, 42.52ex/s] +preprocess dataset #3: 0%|▉ | 38/9497 [00:01<04:16, 36.94ex/s] +preprocess dataset #4: 0%|▏ | 6/9497 [00:00<18:47, 8.42ex/s] +preprocess dataset #5: 1%|█▋ | 74/9497 [00:02<03:17, 47.62ex/s] +preprocess dataset #6: 0%|▌ | 22/9497 [00:01<06:20, 24.88ex/s] +preprocess dataset #7: 1%|██ | 89/9497 [00:02<03:26, 45.56ex/s] +preprocess dataset #8: 0%|▋ | 31/9497 [00:01<04:49, 32.66ex/s] +preprocess dataset #9: 0%| | 1/9497 [00:00<2:13:35, 1.18ex/s] +preprocess dataset #10: 0%|█ | 45/9497 [00:01<04:03, 38.89ex/s] +preprocess dataset #11: 1%|██▏ | 97/9496 [00:03<03:44, 41.87ex/s] +preprocess dataset #12: 1%|█▏ | 52/9496 [00:01<03:53, 40.40ex/s] +preprocess dataset #13: 0%|▎ | 16/9496 [00:01<07:08, 22.12ex/s] +preprocess dataset #14: 1%|█▍ | 64/9496 [00:02<04:40, 33.58ex/s] +preprocess dataset #15: 0%|▌ | 23/9496 [00:01<05:42, 27.64ex/s] +preprocess dataset #16: 1%|█▉ | 83/9496 [00:02<04:10, 37.51ex/s] +preprocess dataset #17: 0%|▊ | 37/9496 [00:01<04:49, 32.68ex/s] +preprocess dataset #18: 1%|█▎ | 59/9496 [00:02<04:30, 34.90ex/s] +preprocess dataset #19: 1%|██▏ | 96/9496 [00:03<04:49, 32.48ex/s] +preprocess dataset #20: 0%|▊ | 33/9496 [00:01<05:26, 28.94ex/s] +preprocess dataset #21: 0%|▏ | 8/9496 [00:01<13:50, 11.42ex/s] +preprocess dataset #22: 1%|█▋ | 73/9496 [00:03<04:56, 31.74ex/s] +preprocess dataset #23: 0%|▌ | 26/9496 [00:01<06:14, 25.28ex/s] +preprocess dataset #24: 1%|█▌ | 70/9496 [00:03<04:36, 34.10ex/s] +preprocess dataset #25: 0%|▌ | 25/9496 [00:01<06:52, 22.97ex/s] +preprocess dataset #25: 1%|█▋ | 71/9496 [00:03<06:22, 24.66ex/s] +preprocess dataset #26: 1%|█▏ | 49/9496 [00:02<06:26, 24.43ex/s] +preprocess dataset #27: 0%|▍ | 21/9496 [00:01<07:30, 21.03ex/s] +preprocess dataset #27: 1%|█▌ | 68/9496 [00:03<06:41, 23.49ex/s] +preprocess dataset #28: 0%|▉ | 38/9496 [00:02<06:23, 24.63ex/s] +preprocess dataset #29: 1%|█▎ | 56/9496 [00:02<05:18, 29.62ex/s] +preprocess dataset #30: 0%|▋ | 31/9496 [00:01<05:50, 26.99ex/s] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +preprocess dataset #20: 44%|███████████████████████████████████████████████████████████████████████████████████████████████▉ | 4198/9496 [02:17<02:30, 35.14ex/s] +preprocess dataset #21: 44%|██████████████████████████████████████████████████████████████████████████████████████████████▋ | 4142/9496 [02:16<02:32, 35.17ex/s] +preprocess dataset #22: 43%|██████████████████████████████████████████████████████████████████████████████████████████████ | 4114/9496 [02:16<02:35, 34.60ex/s] +preprocess dataset #23: 43%|█████████████████████████████████████████████████████████████████████████████████████████████ | 4071/9496 [02:14<02:43, 33.23ex/s] +preprocess dataset #24: 43%|████████████████████████████████████████████████████████████████████████████████████████████▎ | 4038/9496 [02:13<03:02, 29.89ex/s] +preprocess dataset #25: 42%|███████████████████████████████████████████████████████████████████████████████████████████▏ | 3991/9496 [02:12<02:29, 36.82ex/s] +preprocess dataset #26: 42%|██████████████████████████████████████████████████████████████████████████████████████████▍ | 3960/9496 [02:11<02:46, 33.29ex/s] +preprocess dataset #27: 42%|███████████████████████████████████████████████████████████████████████████████████████████ | 3983/9496 [02:10<02:47, 32.97ex/s] +preprocess dataset #28: 41%|██████████████████████████████████████████████████████████████████████████████████████████ | 3939/9496 [02:09<02:46, 33.36ex/s] +preprocess dataset #29: 42%|██████████████████████████████████████████████████████████████████████████████████████████▎ | 3954/9496 [02:07<02:52, 32.15ex/s] +preprocess dataset #30: 41%|████████████████████████████████████████████████████████████████████████████████████████▌ | 3876/9496 [02:06<02:46, 33.71ex/s] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +preprocess dataset #4: 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8501/9497 [04:33<00:30, 32.43ex/s] +preprocess dataset #5: 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8402/9497 [04:32<00:30, 36.28ex/s] +preprocess dataset #6: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8354/9497 [04:31<00:35, 32.64ex/s] +preprocess dataset #7: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8331/9497 [04:30<00:34, 33.47ex/s] +preprocess dataset #8: 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 7920/9497 [04:29<00:43, 36.43ex/s] +preprocess dataset #9: 84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7996/9497 [04:28<00:39, 38.44ex/s] +preprocess dataset #10: 87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8231/9497 [04:28<00:36, 34.90ex/s] +preprocess dataset #11: 86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8129/9496 [04:27<00:35, 38.42ex/s] +preprocess dataset #12: 83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 7916/9496 [04:26<00:48, 32.88ex/s] +preprocess dataset #13: 86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8159/9496 [04:25<00:44, 29.78ex/s] +preprocess dataset #14: 83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7900/9496 [04:24<00:47, 33.59ex/s] +preprocess dataset #15: 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 7963/9496 [04:23<00:38, 40.25ex/s] +preprocess dataset #16: 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 7958/9496 [04:22<00:45, 33.60ex/s] +preprocess dataset #17: 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7944/9496 [04:21<00:53, 28.81ex/s] +preprocess dataset #18: 84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7997/9496 [04:20<00:39, 37.64ex/s] +preprocess dataset #19: 85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8078/9496 [04:19<00:43, 32.36ex/s] +preprocess dataset #20: 83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7890/9496 [04:17<00:50, 31.53ex/s] +preprocess dataset #21: 82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7804/9496 [04:17<00:51, 32.97ex/s] +preprocess dataset #22: 82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7803/9496 [04:16<00:48, 35.08ex/s] +preprocess dataset #23: 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7777/9496 [04:15<00:51, 33.43ex/s] +preprocess dataset #24: 81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7671/9496 [04:14<00:59, 30.73ex/s] +preprocess dataset #25: 80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7644/9496 [04:13<00:57, 32.44ex/s] +preprocess dataset #26: 80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 7618/9496 [04:11<00:58, 32.18ex/s] +preprocess dataset #27: 81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7732/9496 [04:10<00:54, 32.37ex/s] +preprocess dataset #28: 80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7630/9496 [04:09<01:03, 29.32ex/s] +preprocess dataset #29: 80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 7634/9496 [04:08<00:58, 31.57ex/s] +preprocess dataset #30: 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 7571/9496 [04:07<01:00, 31.79ex/s] + + + + + + + + + + + + + + + +preprocess dataset #29: 91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8616/9496 [04:40<00:24, 35.53ex/s] +preprocess dataset #30: 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8542/9496 [04:39<00:26, 35.90ex/s] +preprocess dataset #29: 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 8676/9496 [04:42<00:27, 30.04ex/s] +preprocess dataset #30: 91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8599/9496 [04:41<00:27, 33.11ex/s] +preprocess dataset #30: 91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8661/9496 [04:43<00:33, 24.93ex/s] +preprocess dataset #29: 93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8814/9496 [04:46<00:17, 39.21ex/s] +preprocess dataset #30: 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8730/9496 [04:45<00:21, 36.40ex/s] +preprocess dataset #29: 94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8879/9496 [04:48<00:19, 31.25ex/s] +preprocess dataset #30: 93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8799/9496 [04:47<00:21, 32.97ex/s] + +preprocess dataset #30: 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8931/9496 [04:51<00:16, 33.46ex/s] +preprocess dataset #29: 96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9080/9496 [04:54<00:10, 39.28ex/s] +preprocess dataset #30: 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8995/9496 [04:53<00:12, 39.74ex/s] +preprocess dataset #28: 96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9122/9496 [04:57<00:10, 36.51ex/s] +preprocess dataset #29: 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9154/9496 [04:56<00:07, 48.51ex/s] +preprocess dataset #30: 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9053/9496 [04:55<00:12, 34.43ex/s] +preprocess dataset #25: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9267/9496 [05:03<00:04, 54.36ex/s] +preprocess dataset #26: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9272/9496 [05:02<00:03, 58.50ex/s] +preprocess dataset #27: 98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9352/9496 [05:00<00:02, 56.13ex/s] +preprocess dataset #28: 97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9238/9496 [05:00<00:04, 61.89ex/s] +preprocess dataset #29: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9264/9496 [04:58<00:03, 60.60ex/s] +preprocess dataset #30: 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9166/9496 [04:57<00:05, 60.97ex/s] +preprocess dataset #30: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9270/9496 [04:59<00:03, 59.51ex/s] +preprocess dataset #28: 99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9441/9496 [05:03<00:00, 62.57ex/s] +preprocess dataset #28: 99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9448/9496 [05:03<00:00, 58.73ex/s] +preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 9488/9496 [05:02<00:00, 59.58ex/s] +preprocess dataset #30: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9401/9496 [05:01<00:01, 57.34ex/s] +preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 9490/9496 [05:04<00:00, 55.82ex/s] +preprocess dataset #30: 99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9423/9496 [05:01<00:01, 65.02ex/s] +preprocess dataset #30: 99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9446/9496 [05:02<00:00, 59.45ex/s] +preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 9490/9496 [05:02<00:00, 66.83ex/s] +preprocess dataset #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9453/9496 [05:01<00:00, 73.43ex/s] +preprocess dataset #3: 0%| | 0/1267 [00:00 to the vocabulary +Adding to the vocabulary +2022-07-30 08:24:24.654643: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2022-07-30 08:24:24.654682: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) +/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) +INFO:__main__:***** Running training ***** +INFO:__main__: Num examples = 302693 +INFO:__main__: Num Epochs = 40 +INFO:__main__: Instantaneous batch size per device = 24 +INFO:__main__: Num gradient accumulation steps = 1 +INFO:__main__: Total train batch size (w. parallel & distributed) = 192 +INFO:__main__: Total optimization steps = 63040 +INFO:__main__: Gradient checkpointing: True +INFO:__main__: Use scan: False +INFO:__main__: Fuse matmuls: False +Epoch ... (1/40): 0%| | 0/40 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) + File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss + out_tree, out_flat = f_pmapped_(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f + out = pxla.xla_pmap( + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind + return map_bind(self, fun, *args, **params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process + return trace.process_map(self, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call + return primitive.impl(f, *tracers, **params) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl + compiled_fun, fingerprint = parallel_callable( + File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun + ans = call(fun, *args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable + pmap_executable = pmap_computation.compile() + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile + self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo + compiled = dispatch.compile_or_get_cached( + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached + return backend_compile(backend, computation, compile_options, host_callbacks) + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile + return backend.compile(built_c, compile_options=options) +jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 20.24G of 15.48G hbm. Exceeded hbm capacity by 4.75G. +Total hbm usage >= 20.75G: + reserved 530.00M + program 9.42G + arguments 10.82G +Output size 10.76G; shares 10.76G with arguments. +Program hbm requirement 9.42G: + global 196.0K + scoped 72.08M + HLO temp 9.35G (98.7% utilization: Unpadded (8.88G) Padded (9.00G), 3.7% fragmentation (354.84M)) + Largest program allocations in hbm: + 1. Size: 1.46G + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/42/remat(core_fn)/42/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[24,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 1.43G + Extra memory due to padding: 38.08M (1.0x expansion) + XLA label: fusion.24028 = fusion(fusion.14571, bitcast.7348, bitcast.7346), kind=kOutput, calls=fused_computation.18801 + Allocation type: HLO temp + ========================== + 2. Size: 1.46G + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/42/remat(core_fn)/42/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[24,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 1.43G + Extra memory due to padding: 38.08M (1.0x expansion) + XLA label: fusion.188.remat5 = fusion(bitcast.7348, bitcast.7346, fusion.14571), kind=kOutput, calls=fused_computation.184.clone.clone.clone.clone.clone + Allocation type: HLO temp + ========================== + 3. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.7200 = fusion(get-tuple-element.9849, get-tuple-element.17172, get-tuple-element.10141, get-tuple-element.17171, ...(+3)), kind=kLoop, calls=fused_computation.6644 + Allocation type: HLO temp + ========================== + 4. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/36/36/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20050 = fusion(fusion.6760.remat, get-tuple-element.13727, fusion.1609, bitcast.11036), kind=kOutput, calls=fused_computation.18616 + Allocation type: HLO temp + ========================== + 5. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/34/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20054 = fusion(fusion.6764.remat, get-tuple-element.13695, fusion.1611, bitcast.11040), kind=kOutput, calls=fused_computation.18620 + Allocation type: HLO temp + ========================== + 6. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/41/41/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20040 = fusion(fusion.6750.remat, get-tuple-element.13823, fusion.1604, bitcast.11026), kind=kOutput, calls=fused_computation.18606 + Allocation type: HLO temp + ========================== + 7. Size: 117.07M + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: copy.17574 = copy(bitcast.14704) + Allocation type: HLO temp + ========================== + 8. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20124 = fusion(fusion.6836, copy.17574, get-tuple-element.14005, fusion.8741, ...(+1)), kind=kLoop, calls=fused_computation.18690 + Allocation type: HLO temp + ========================== + 9. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/40/40/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20042 = fusion(fusion.6752.remat, get-tuple-element.13807, fusion.1605, bitcast.11028), kind=kOutput, calls=fused_computation.18608 + Allocation type: HLO temp + ========================== + 10. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20122 = fusion(get-tuple-element.11239, get-tuple-element.13247, fusion.1645, bitcast.11108), kind=kOutput, calls=fused_computation.18688 + Allocation type: HLO temp + ========================== + 11. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20120 = fusion(get-tuple-element.11243, get-tuple-element.13263, fusion.1644, bitcast.11106), kind=kOutput, calls=fused_computation.18686 + Allocation type: HLO temp + ========================== + 12. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20118 = fusion(get-tuple-element.11247, get-tuple-element.13439, fusion.1643, bitcast.11104), kind=kOutput, calls=fused_computation.18684 + Allocation type: HLO temp + ========================== + 13. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20116 = fusion(get-tuple-element.11251, get-tuple-element.13615, fusion.1642, bitcast.11102), kind=kOutput, calls=fused_computation.18682 + Allocation type: HLO temp + ========================== + 14. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20114 = fusion(get-tuple-element.11255, get-tuple-element.13791, fusion.1641, bitcast.11100), kind=kOutput, calls=fused_computation.18680 + Allocation type: HLO temp + ========================== + 15. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20112 = fusion(get-tuple-element.11259, get-tuple-element.13935, fusion.1640, bitcast.11098), kind=kOutput, calls=fused_computation.18678 + Allocation type: HLO temp + ========================== + 16. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20110 = fusion(get-tuple-element.11263, get-tuple-element.13951, fusion.1639, bitcast.11096), kind=kOutput, calls=fused_computation.18676 + Allocation type: HLO temp + ========================== + 17. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20108 = fusion(get-tuple-element.11267, get-tuple-element.13967, fusion.1638, bitcast.11094), kind=kOutput, calls=fused_computation.18674 + Allocation type: HLO temp + ========================== + 18. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20106 = fusion(get-tuple-element.11271, get-tuple-element.13983, fusion.1637, bitcast.11092), kind=kOutput, calls=fused_computation.18672 + Allocation type: HLO temp + ========================== + 19. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20104 = fusion(get-tuple-element.11275, get-tuple-element.13999, fusion.1636, bitcast.11090), kind=kOutput, calls=fused_computation.18670 + Allocation type: HLO temp + ========================== + 20. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20102 = fusion(get-tuple-element.11279, get-tuple-element.13279, fusion.1635, bitcast.11088), kind=kOutput, calls=fused_computation.18668 + Allocation type: HLO temp + ========================== +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1605, in + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) +jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 20.24G of 15.48G hbm. Exceeded hbm capacity by 4.75G. +Total hbm usage >= 20.75G: + reserved 530.00M + program 9.42G + arguments 10.82G +Output size 10.76G; shares 10.76G with arguments. +Program hbm requirement 9.42G: + global 196.0K + scoped 72.08M + HLO temp 9.35G (98.7% utilization: Unpadded (8.88G) Padded (9.00G), 3.7% fragmentation (354.84M)) + Largest program allocations in hbm: + 1. Size: 1.46G + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/42/remat(core_fn)/42/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[24,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 1.43G + Extra memory due to padding: 38.08M (1.0x expansion) + XLA label: fusion.24028 = fusion(fusion.14571, bitcast.7348, bitcast.7346), kind=kOutput, calls=fused_computation.18801 + Allocation type: HLO temp + ========================== + 2. Size: 1.46G + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/42/remat(core_fn)/42/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[24,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 1.43G + Extra memory due to padding: 38.08M (1.0x expansion) + XLA label: fusion.188.remat5 = fusion(bitcast.7348, bitcast.7346, fusion.14571), kind=kOutput, calls=fused_computation.184.clone.clone.clone.clone.clone + Allocation type: HLO temp + ========================== + 3. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.7200 = fusion(get-tuple-element.9849, get-tuple-element.17172, get-tuple-element.10141, get-tuple-element.17171, ...(+3)), kind=kLoop, calls=fused_computation.6644 + Allocation type: HLO temp + ========================== + 4. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/36/36/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20050 = fusion(fusion.6760.remat, get-tuple-element.13727, fusion.1609, bitcast.11036), kind=kOutput, calls=fused_computation.18616 + Allocation type: HLO temp + ========================== + 5. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/34/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20054 = fusion(fusion.6764.remat, get-tuple-element.13695, fusion.1611, bitcast.11040), kind=kOutput, calls=fused_computation.18620 + Allocation type: HLO temp + ========================== + 6. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/41/41/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20040 = fusion(fusion.6750.remat, get-tuple-element.13823, fusion.1604, bitcast.11026), kind=kOutput, calls=fused_computation.18606 + Allocation type: HLO temp + ========================== + 7. Size: 117.07M + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: copy.17574 = copy(bitcast.14704) + Allocation type: HLO temp + ========================== + 8. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20124 = fusion(fusion.6836, copy.17574, get-tuple-element.14005, fusion.8741, ...(+1)), kind=kLoop, calls=fused_computation.18690 + Allocation type: HLO temp + ========================== + 9. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/40/40/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20042 = fusion(fusion.6752.remat, get-tuple-element.13807, fusion.1605, bitcast.11028), kind=kOutput, calls=fused_computation.18608 + Allocation type: HLO temp + ========================== + 10. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20122 = fusion(get-tuple-element.11239, get-tuple-element.13247, fusion.1645, bitcast.11108), kind=kOutput, calls=fused_computation.18688 + Allocation type: HLO temp + ========================== + 11. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20120 = fusion(get-tuple-element.11243, get-tuple-element.13263, fusion.1644, bitcast.11106), kind=kOutput, calls=fused_computation.18686 + Allocation type: HLO temp + ========================== + 12. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20118 = fusion(get-tuple-element.11247, get-tuple-element.13439, fusion.1643, bitcast.11104), kind=kOutput, calls=fused_computation.18684 + Allocation type: HLO temp + ========================== + 13. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20116 = fusion(get-tuple-element.11251, get-tuple-element.13615, fusion.1642, bitcast.11102), kind=kOutput, calls=fused_computation.18682 + Allocation type: HLO temp + ========================== + 14. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20114 = fusion(get-tuple-element.11255, get-tuple-element.13791, fusion.1641, bitcast.11100), kind=kOutput, calls=fused_computation.18680 + Allocation type: HLO temp + ========================== + 15. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20112 = fusion(get-tuple-element.11259, get-tuple-element.13935, fusion.1640, bitcast.11098), kind=kOutput, calls=fused_computation.18678 + Allocation type: HLO temp + ========================== + 16. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20110 = fusion(get-tuple-element.11263, get-tuple-element.13951, fusion.1639, bitcast.11096), kind=kOutput, calls=fused_computation.18676 + Allocation type: HLO temp + ========================== + 17. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20108 = fusion(get-tuple-element.11267, get-tuple-element.13967, fusion.1638, bitcast.11094), kind=kOutput, calls=fused_computation.18674 + Allocation type: HLO temp + ========================== + 18. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20106 = fusion(get-tuple-element.11271, get-tuple-element.13983, fusion.1637, bitcast.11092), kind=kOutput, calls=fused_computation.18672 + Allocation type: HLO temp + ========================== + 19. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20104 = fusion(get-tuple-element.11275, get-tuple-element.13999, fusion.1636, bitcast.11090), kind=kOutput, calls=fused_computation.18670 + Allocation type: HLO temp + ========================== + 20. Size: 117.07M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[24,999,1280]{2,0,1:T(8,128)} + Unpadded size: 117.07M + XLA label: fusion.20102 = fusion(get-tuple-element.11279, get-tuple-element.13279, fusion.1635, bitcast.11088), kind=kOutput, calls=fused_computation.18668 + Allocation type: HLO temp + ========================== \ No newline at end of file diff --git a/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt b/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220730_081215-1j14rrmn/files/wandb-metadata.json b/wandb/run-20220730_081215-1j14rrmn/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..fc26413d143e0a899730725a474903daca138a98 --- /dev/null +++ b/wandb/run-20220730_081215-1j14rrmn/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-30T08:12:19.247538", + "startedAt": "2022-07-30T08:12:15.884916", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=./", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=24", + "--per_device_eval_batch_size=24", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json b/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..68aedcb3f4d7db9af7ba584547258d149c6f582c --- /dev/null +++ b/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 1000}} \ No newline at end of file diff --git a/wandb/run-20220730_081215-1j14rrmn/logs/debug-internal.log b/wandb/run-20220730_081215-1j14rrmn/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..8507fbbbf1042e54098fa61f87a455bfa1f723ed --- /dev/null +++ b/wandb/run-20220730_081215-1j14rrmn/logs/debug-internal.log @@ -0,0 +1,555 @@ +2022-07-30 08:12:16,797 INFO MainThread:1066697 [internal.py:wandb_internal():87] W&B internal server running at pid: 1066697, started at: 2022-07-30 08:12:16.796960 +2022-07-30 08:12:16,799 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: check_version +2022-07-30 08:12:16,799 INFO WriterThread:1066697 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/run-1j14rrmn.wandb +2022-07-30 08:12:16,800 DEBUG SenderThread:1066697 [sender.py:send():234] send: header +2022-07-30 08:12:16,800 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: check_version +2022-07-30 08:12:16,838 DEBUG SenderThread:1066697 [sender.py:send():234] send: run +2022-07-30 08:12:17,015 INFO SenderThread:1066697 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files +2022-07-30 08:12:17,015 INFO SenderThread:1066697 [sender.py:_start_run_threads():804] run started: 1j14rrmn with start time 1659168735 +2022-07-30 08:12:17,015 DEBUG SenderThread:1066697 [sender.py:send():234] send: summary +2022-07-30 08:12:17,015 INFO SenderThread:1066697 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 08:12:17,015 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: run_start +2022-07-30 08:12:18,017 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json +2022-07-30 08:12:19,247 DEBUG HandlerThread:1066697 [meta.py:__init__():40] meta init +2022-07-30 08:12:19,247 DEBUG HandlerThread:1066697 [meta.py:__init__():54] meta init done +2022-07-30 08:12:19,247 DEBUG HandlerThread:1066697 [meta.py:probe():214] probe +2022-07-30 08:12:19,248 DEBUG HandlerThread:1066697 [meta.py:_setup_git():204] setup git +2022-07-30 08:12:19,287 DEBUG HandlerThread:1066697 [meta.py:_setup_git():211] setup git done +2022-07-30 08:12:19,287 DEBUG HandlerThread:1066697 [meta.py:_save_code():92] save code +2022-07-30 08:12:19,300 DEBUG HandlerThread:1066697 [meta.py:_save_code():113] save code done +2022-07-30 08:12:19,300 DEBUG HandlerThread:1066697 [meta.py:_save_patches():130] save patches +2022-07-30 08:12:19,374 DEBUG HandlerThread:1066697 [meta.py:_save_patches():172] save patches done +2022-07-30 08:12:19,374 DEBUG HandlerThread:1066697 [meta.py:_save_pip():58] save pip +2022-07-30 08:12:19,375 DEBUG HandlerThread:1066697 [meta.py:_save_pip():72] save pip done +2022-07-30 08:12:19,375 DEBUG HandlerThread:1066697 [meta.py:probe():252] probe done +2022-07-30 08:12:19,378 DEBUG SenderThread:1066697 [sender.py:send():234] send: files +2022-07-30 08:12:19,378 INFO SenderThread:1066697 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-30 08:12:19,379 INFO SenderThread:1066697 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-30 08:12:19,379 INFO SenderThread:1066697 [sender.py:_save_file():939] saving file diff.patch with policy now +2022-07-30 08:12:19,385 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:12:19,386 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:12:19,853 INFO Thread-13 :1066697 [upload_job.py:push():137] Uploaded file /tmp/tmpcv_bspjcwandb/11b76ghg-diff.patch +2022-07-30 08:12:19,923 INFO Thread-11 :1066697 [upload_job.py:push():137] Uploaded file /tmp/tmpcv_bspjcwandb/ttzg9uht-wandb-metadata.json +2022-07-30 08:12:20,021 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:12:20,021 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt +2022-07-30 08:12:20,021 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/diff.patch +2022-07-30 08:12:20,022 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/code/run_flax_speech_recognition_ctc.py +2022-07-30 08:12:20,022 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-metadata.json +2022-07-30 08:12:20,022 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/code +2022-07-30 08:12:20,058 INFO Thread-12 :1066697 [upload_job.py:push():137] Uploaded file /tmp/tmpcv_bspjcwandb/3u6u0iox-code/run_flax_speech_recognition_ctc.py +2022-07-30 08:12:22,022 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:12:24,023 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:12:26,024 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:12:28,025 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:12:34,029 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:12:34,522 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:12:34,523 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:12:36,030 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:12:47,324 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:12:48,036 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:12:49,682 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:12:49,683 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:12:50,037 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:13:01,042 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:13:03,044 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:13:04,886 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:13:04,886 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:13:17,051 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:13:17,400 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:13:19,052 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:13:20,032 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:13:20,033 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:13:35,188 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:13:35,188 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:13:47,472 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:13:50,379 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:13:50,380 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:14:01,072 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:04,073 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:05,655 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:14:05,655 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:14:06,074 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:08,075 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:10,076 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:12,077 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:14,078 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:16,080 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:17,538 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:14:18,081 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:20,082 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:20,979 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:14:20,980 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:14:22,083 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:24,084 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:26,085 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:28,086 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:30,090 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:32,091 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:34,092 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:36,093 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:36,163 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:14:36,164 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:14:38,094 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:40,095 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:42,096 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:44,097 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:46,098 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:47,614 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:14:48,099 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:50,099 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:51,300 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:14:51,300 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:14:52,100 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:54,101 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:56,102 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:14:58,103 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:00,104 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:02,105 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:04,110 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:06,111 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:06,461 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:15:06,461 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:15:08,112 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:10,113 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:12,114 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:14,115 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:16,116 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:17,684 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:15:18,117 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:20,118 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:21,596 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:15:21,597 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:15:22,120 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:24,121 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:27,122 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:29,123 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:31,124 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:33,125 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:35,126 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:36,740 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:15:36,740 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:15:37,127 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:39,129 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:41,130 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:43,131 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:45,132 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:47,133 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:47,767 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:15:49,134 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:51,135 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:51,880 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:15:51,881 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:15:53,136 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:55,137 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:57,138 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:15:59,140 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:01,141 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:03,142 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:05,144 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:07,069 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:16:07,070 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:16:07,145 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:09,146 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:11,147 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:13,148 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:15,149 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:17,150 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:17,847 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:16:19,151 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:21,152 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:22,207 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:16:22,207 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:16:23,153 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:25,155 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:27,156 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:29,157 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:31,158 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:33,159 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:35,160 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:37,161 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:37,354 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:16:37,355 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:16:39,162 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:41,163 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:43,164 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:45,165 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:47,167 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:47,929 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:16:49,168 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:51,169 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:52,496 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:16:52,496 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:16:53,172 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:55,173 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:57,174 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:16:59,176 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:01,177 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:03,178 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:05,179 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:07,180 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:07,679 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:17:07,680 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:17:09,181 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:11,182 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:13,183 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:15,184 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:17,186 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:18,012 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:17:19,187 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:21,189 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:22,820 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:17:22,820 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:17:23,190 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:25,191 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:27,192 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:29,193 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:31,194 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:33,195 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:35,196 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:37,197 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:37,961 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:17:37,961 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:17:39,198 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:41,200 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:43,201 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:45,202 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:47,204 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:48,090 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:17:49,205 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:51,206 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:53,098 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:17:53,098 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:17:53,207 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:55,208 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:57,209 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:17:59,210 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:01,211 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:03,212 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:05,214 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:07,215 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:08,239 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:18:08,239 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:18:09,216 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:11,217 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:13,218 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:15,219 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:17,220 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:18,173 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:18:19,221 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:21,222 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:23,223 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:23,381 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:18:23,381 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:18:25,224 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:27,225 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:29,226 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:31,226 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:33,227 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:35,232 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:37,232 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:38,544 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:18:38,544 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:18:39,233 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:41,235 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:43,236 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:45,237 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:47,238 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:48,259 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:18:49,240 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:51,241 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:53,243 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:53,684 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:18:53,684 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:18:55,244 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:18:58,245 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:00,247 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:02,248 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:04,249 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:06,250 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:08,251 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:08,830 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:19:08,831 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:19:10,252 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:12,253 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:14,254 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:16,255 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:18,256 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:18,345 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:19:20,259 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:22,260 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:23,968 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:19:23,969 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:19:24,262 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:26,263 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:28,264 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:30,265 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:32,266 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:19:39,127 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:19:39,127 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:19:48,419 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:19:54,261 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:19:54,261 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:20:09,418 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:20:09,418 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:20:10,283 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:12,284 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:14,285 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:16,286 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:18,287 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:18,491 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:20:20,288 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:22,289 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:24,290 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:24,561 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:20:24,562 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:20:26,291 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:28,293 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:30,294 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:32,295 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:34,296 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:36,297 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:38,298 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:39,702 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:20:39,702 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:20:40,299 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:48,302 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:48,565 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:20:50,304 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:52,305 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:54,306 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:20:54,850 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:20:54,850 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:20:56,307 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:09,990 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:21:09,991 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:21:18,635 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:21:25,124 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:21:25,125 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:21:35,324 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:37,325 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:39,326 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:40,258 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:21:40,258 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:21:41,327 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:43,328 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:45,330 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:47,331 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:48,706 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:21:49,332 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:51,333 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:53,339 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:55,340 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:55,396 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:21:55,396 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:21:57,341 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:21:59,341 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:01,343 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:03,344 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:05,345 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:09,347 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:10,550 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:22:10,551 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:22:11,348 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:13,349 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:15,350 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:17,351 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:18,781 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:22:19,352 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:21,353 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:23,355 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:25,356 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:25,921 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:22:25,922 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:22:27,357 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:22:41,062 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:22:41,063 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:22:48,862 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:22:56,209 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:22:56,209 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:22:57,372 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:23:11,349 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:23:11,350 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:23:18,936 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:23:26,514 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:23:26,514 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:23:41,392 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:23:42,030 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:23:42,030 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:23:49,006 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:23:50,397 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:23:57,371 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:23:57,372 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:23:58,401 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:24:07,405 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:24:12,623 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:24:12,623 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:24:15,409 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:24:19,076 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:24:23,413 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:24:25,414 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:24:27,844 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:24:27,844 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:24:31,417 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:24:33,418 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:24:35,419 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:24:42,423 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:24:43,414 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:24:43,415 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:24:49,142 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:24:58,843 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:24:58,844 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:25:13,994 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:25:13,994 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:25:19,203 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:25:23,441 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:25:29,148 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:25:29,148 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:25:31,445 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:25:33,446 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:25:44,309 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:25:44,309 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:25:49,266 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:25:59,453 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:25:59,454 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:26:14,589 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:26:14,590 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:26:19,331 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:26:29,723 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:26:29,724 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:26:44,856 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:26:44,857 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:26:49,394 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:26:59,989 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:26:59,990 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:27:15,123 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:27:15,123 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:27:19,460 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:27:30,257 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:27:30,258 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:27:45,403 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:27:45,403 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:27:49,529 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:28:00,538 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:28:00,539 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:28:15,669 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:28:15,670 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:28:19,599 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:28:30,810 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:28:30,810 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:28:45,949 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:28:45,949 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:28:49,674 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:28:56,539 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:28:57,453 DEBUG SenderThread:1066697 [sender.py:send():234] send: telemetry +2022-07-30 08:28:57,453 DEBUG SenderThread:1066697 [sender.py:send():234] send: exit +2022-07-30 08:28:57,453 INFO SenderThread:1066697 [sender.py:send_exit():366] handling exit code: 1 +2022-07-30 08:28:57,455 INFO SenderThread:1066697 [sender.py:send_exit():368] handling runtime: 1000 +2022-07-30 08:28:57,455 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:57,456 INFO SenderThread:1066697 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 08:28:57,457 INFO SenderThread:1066697 [sender.py:send_exit():374] send defer +2022-07-30 08:28:57,457 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:57,458 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:28:57,458 INFO HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-30 08:28:57,458 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: defer +2022-07-30 08:28:57,458 INFO SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-30 08:28:57,458 INFO SenderThread:1066697 [sender.py:transition_state():387] send defer: 1 +2022-07-30 08:28:57,459 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:28:57,459 INFO HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-30 08:28:57,494 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: defer +2022-07-30 08:28:57,494 INFO SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-30 08:28:57,494 INFO SenderThread:1066697 [sender.py:transition_state():387] send defer: 2 +2022-07-30 08:28:57,494 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:28:57,494 INFO HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-30 08:28:57,495 DEBUG SenderThread:1066697 [sender.py:send():234] send: stats +2022-07-30 08:28:57,495 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: defer +2022-07-30 08:28:57,495 INFO SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-30 08:28:57,495 INFO SenderThread:1066697 [sender.py:transition_state():387] send defer: 3 +2022-07-30 08:28:57,495 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:28:57,496 INFO HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-30 08:28:57,496 DEBUG SenderThread:1066697 [sender.py:send():234] send: summary +2022-07-30 08:28:57,496 INFO SenderThread:1066697 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 08:28:57,496 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: defer +2022-07-30 08:28:57,496 INFO SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-30 08:28:57,496 INFO SenderThread:1066697 [sender.py:transition_state():387] send defer: 4 +2022-07-30 08:28:57,497 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:28:57,497 INFO HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-30 08:28:57,497 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: defer +2022-07-30 08:28:57,497 INFO SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-30 08:28:57,539 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:28:57,539 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json +2022-07-30 08:28:57,560 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:57,657 INFO SenderThread:1066697 [sender.py:transition_state():387] send defer: 5 +2022-07-30 08:28:57,657 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:57,658 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:28:57,658 INFO HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-30 08:28:57,658 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: defer +2022-07-30 08:28:57,658 INFO SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-30 08:28:57,658 INFO SenderThread:1066697 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-30 08:28:57,759 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:58,539 INFO Thread-8 :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/config.yaml +2022-07-30 08:28:58,540 INFO SenderThread:1066697 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files +2022-07-30 08:28:58,540 INFO SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/config.yaml config.yaml +2022-07-30 08:28:58,541 INFO SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/diff.patch diff.patch +2022-07-30 08:28:58,541 INFO SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt requirements.txt +2022-07-30 08:28:58,542 INFO SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log output.log +2022-07-30 08:28:58,545 INFO SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json wandb-summary.json +2022-07-30 08:28:58,545 INFO SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-metadata.json wandb-metadata.json +2022-07-30 08:28:58,548 INFO SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-30 08:28:58,548 INFO SenderThread:1066697 [sender.py:transition_state():387] send defer: 6 +2022-07-30 08:28:58,548 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:58,555 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:28:58,555 INFO HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-30 08:28:58,556 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: defer +2022-07-30 08:28:58,556 INFO SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-30 08:28:58,556 INFO SenderThread:1066697 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 08:28:58,656 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:58,656 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:58,758 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:58,758 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:58,859 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:58,859 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:58,961 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:58,961 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:59,048 INFO Thread-15 :1066697 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt +2022-07-30 08:28:59,063 INFO Thread-17 :1066697 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json +2022-07-30 08:28:59,063 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:59,064 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:59,068 INFO Thread-14 :1066697 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/config.yaml +2022-07-30 08:28:59,165 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:59,165 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:59,223 INFO Thread-16 :1066697 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log +2022-07-30 08:28:59,267 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:59,267 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:59,368 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:59,369 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:59,424 INFO Thread-7 :1066697 [sender.py:transition_state():387] send defer: 7 +2022-07-30 08:28:59,425 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:28:59,425 INFO HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-30 08:28:59,425 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: defer +2022-07-30 08:28:59,425 INFO SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-30 08:28:59,470 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:59,882 INFO SenderThread:1066697 [sender.py:transition_state():387] send defer: 8 +2022-07-30 08:28:59,882 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:59,883 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:28:59,883 INFO HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-30 08:28:59,883 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: defer +2022-07-30 08:28:59,883 INFO SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-30 08:28:59,884 INFO SenderThread:1066697 [sender.py:transition_state():387] send defer: 9 +2022-07-30 08:28:59,884 DEBUG SenderThread:1066697 [sender.py:send():234] send: final +2022-07-30 08:28:59,884 DEBUG SenderThread:1066697 [sender.py:send():234] send: footer +2022-07-30 08:28:59,885 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:28:59,885 INFO HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-30 08:28:59,885 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: defer +2022-07-30 08:28:59,885 INFO SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-30 08:28:59,984 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:28:59,984 DEBUG SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:28:59,984 INFO SenderThread:1066697 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 08:29:00,256 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: get_summary +2022-07-30 08:29:00,256 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-30 08:29:00,257 DEBUG HandlerThread:1066697 [handler.py:handle_request():130] handle_request: shutdown +2022-07-30 08:29:00,257 INFO HandlerThread:1066697 [handler.py:finish():731] shutting down handler +2022-07-30 08:29:00,885 INFO WriterThread:1066697 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/run-1j14rrmn.wandb +2022-07-30 08:29:01,255 INFO SenderThread:1066697 [sender.py:finish():1070] shutting down sender +2022-07-30 08:29:01,255 INFO SenderThread:1066697 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 08:29:01,255 INFO SenderThread:1066697 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 08:29:01,259 INFO MainThread:1066697 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220730_081215-1j14rrmn/logs/debug.log b/wandb/run-20220730_081215-1j14rrmn/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..eb86d60db11a05c06ce19b102739a32148297917 --- /dev/null +++ b/wandb/run-20220730_081215-1j14rrmn/logs/debug.log @@ -0,0 +1,148 @@ +2022-07-30 08:12:15,886 INFO MainThread:1065426 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-30 08:12:15,886 INFO MainThread:1065426 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-30 08:12:15,886 INFO MainThread:1065426 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/logs/debug.log +2022-07-30 08:12:15,886 INFO MainThread:1065426 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/logs/debug-internal.log +2022-07-30 08:12:15,887 INFO MainThread:1065426 [wandb_init.py:init():404] calling init triggers +2022-07-30 08:12:15,887 INFO MainThread:1065426 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-30 08:12:15,887 INFO MainThread:1065426 [wandb_init.py:init():460] starting backend +2022-07-30 08:12:15,887 INFO MainThread:1065426 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-30 08:12:15,934 INFO MainThread:1065426 [backend.py:ensure_launched():216] starting backend process... +2022-07-30 08:12:15,978 INFO MainThread:1065426 [backend.py:ensure_launched():221] started backend process with pid: 1066697 +2022-07-30 08:12:15,980 INFO MainThread:1065426 [wandb_init.py:init():469] backend started and connected +2022-07-30 08:12:15,994 INFO MainThread:1065426 [wandb_init.py:init():533] updated telemetry +2022-07-30 08:12:16,107 INFO MainThread:1065426 [wandb_init.py:init():563] communicating current version +2022-07-30 08:12:16,837 INFO MainThread:1065426 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-30 08:12:16,837 INFO MainThread:1065426 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-30 08:12:17,015 INFO MainThread:1065426 [wandb_init.py:init():606] starting run threads in backend +2022-07-30 08:12:19,382 INFO MainThread:1065426 [wandb_run.py:_console_start():1810] atexit reg +2022-07-30 08:12:19,382 INFO MainThread:1065426 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-30 08:12:19,383 INFO MainThread:1065426 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-30 08:12:19,385 INFO MainThread:1065426 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-30 08:12:19,385 INFO MainThread:1065426 [wandb_init.py:init():633] run started, returning control to user process +2022-07-30 08:28:55,000 INFO MainThread:1065426 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-30 08:28:55,006 INFO MainThread:1065426 [wandb_run.py:_restore():1752] restore +2022-07-30 08:28:57,458 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 74317 +} + +2022-07-30 08:28:57,658 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 74317 +} + +2022-07-30 08:28:58,555 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 380625 +} + +2022-07-30 08:28:58,657 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 380625 +} + +2022-07-30 08:28:58,758 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 380625 + total_bytes: 380625 +} + +2022-07-30 08:28:58,860 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 380625 + total_bytes: 380625 +} + +2022-07-30 08:28:58,962 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 380625 + total_bytes: 380625 +} + +2022-07-30 08:28:59,064 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 380625 + total_bytes: 380625 +} + +2022-07-30 08:28:59,166 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 380625 + total_bytes: 380625 +} + +2022-07-30 08:28:59,268 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 380625 + total_bytes: 380625 +} + +2022-07-30 08:28:59,369 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 380625 + total_bytes: 380625 +} + +2022-07-30 08:28:59,883 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 380625 + total_bytes: 380625 +} + +2022-07-30 08:29:00,255 INFO MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 380625 + total_bytes: 380625 +} +local_info { +} + +2022-07-30 08:29:01,839 INFO MainThread:1065426 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220730_081215-1j14rrmn/run-1j14rrmn.wandb b/wandb/run-20220730_081215-1j14rrmn/run-1j14rrmn.wandb new file mode 100644 index 0000000000000000000000000000000000000000..6d438db798a75fadee9d1caa3331331d701b9f43 --- /dev/null +++ b/wandb/run-20220730_081215-1j14rrmn/run-1j14rrmn.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d31fe4374129f405b586f7d4ac768ff254bb9407ef86c2ca61191d001f87c7 +size 420905 diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_083825-1jwtqtqg/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac --- /dev/null +++ b/wandb/run-20220730_083825-1jwtqtqg/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml b/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..303548b1b8391310517c1c46afcfd54ce1d94227 --- /dev/null +++ b/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659170305 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/diff.patch b/wandb/run-20220730_083825-1jwtqtqg/files/diff.patch new file mode 100644 index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642 --- /dev/null +++ b/wandb/run-20220730_083825-1jwtqtqg/files/diff.patch @@ -0,0 +1,10 @@ +diff --git a/.gitattributes b/.gitattributes +index 755356a..f0eef4b 100644 +--- a/.gitattributes ++++ b/.gitattributes +@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text + *.zip filter=lfs diff=lfs merge=lfs -text + *.zstandard filter=lfs diff=lfs merge=lfs -text + *tfevents* filter=lfs diff=lfs merge=lfs -text ++*.wandb filter=lfs diff=lfs merge=lfs -text +\ No newline at end of file diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/output.log b/wandb/run-20220730_083825-1jwtqtqg/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..f5393e7b38aee757804c8230b435f52beae8e578 --- /dev/null +++ b/wandb/run-20220730_083825-1jwtqtqg/files/output.log @@ -0,0 +1,1596 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul30_08-38-21_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=16, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 78.80it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 442.23it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'bias'), ('project_hid', 'bias'), ('project_hid', 'kernel'), ('quantizer', 'codevectors'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('project_q', 'kernel')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 6%|███████████ | 534/9523 [00:00<00:01, 5336.47ex/s] +removing punctuation from train split #1: 9%|█████████████████▌ | 842/9523 [00:00<00:01, 8413.07ex/s] +removing punctuation from train split #2: 9%|█████████████████▍ | 838/9523 [00:00<00:01, 8373.56ex/s] +removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8069.44ex/s] +removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8314.43ex/s] +removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8520.45ex/s] +removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8426.38ex/s] +removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8000.68ex/s] +removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8342.10ex/s] +removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8513.85ex/s] +removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8257.75ex/s] +removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8343.57ex/s] +removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8202.27ex/s] +removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8288.07ex/s] +removing punctuation from train split #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8171.57ex/s] +removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8342.00ex/s] +removing punctuation from train split #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8211.27ex/s] +removing punctuation from train split #9: 99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9447/9523 [00:01<00:00, 8221.07ex/s] +removing punctuation from train split #12: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 7696/9522 [00:00<00:00, 7880.94ex/s] +removing punctuation from train split #10: 98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9369/9523 [00:01<00:00, 8204.75ex/s] +removing punctuation from train split #11: 90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8610/9523 [00:01<00:00, 8530.61ex/s] +removing punctuation from train split #12: 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8570/9522 [00:01<00:00, 8127.58ex/s] +removing punctuation from train split #13: 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8390/9522 [00:01<00:00, 8024.73ex/s] +removing punctuation from train split #12: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9438/9522 [00:01<00:00, 8288.37ex/s] +removing punctuation from train split #15: 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 7524/9522 [00:00<00:00, 7888.33ex/s] +removing punctuation from train split #13: 97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9247/9522 [00:01<00:00, 8184.43ex/s] +removing punctuation from train split #17: 70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 6654/9522 [00:00<00:00, 8547.53ex/s] +removing punctuation from train split #14: 98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9339/9522 [00:01<00:00, 8295.07ex/s] +removing punctuation from train split #19: 52%|█████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 4949/9522 [00:00<00:00, 8423.73ex/s] +removing punctuation from train split #15: 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9202/9522 [00:01<00:00, 8102.80ex/s] +removing punctuation from train split #18: 55%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 5198/9522 [00:00<00:00, 7093.89ex/s] +removing punctuation from train split #16: 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9406/9522 [00:01<00:00, 8346.54ex/s] +removing punctuation from train split #18: 64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 6053/9522 [00:00<00:00, 6973.73ex/s] +removing punctuation from train split #17: 97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9235/9522 [00:01<00:00, 8258.18ex/s] +removing punctuation from train split #18: 73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 6918/9522 [00:00<00:00, 7444.23ex/s] +removing punctuation from train split #18: 82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 7795/9522 [00:01<00:00, 7823.84ex/s] +removing punctuation from train split #20: 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7592/9522 [00:00<00:00, 7943.57ex/s] +removing punctuation from train split #21: 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 6688/9522 [00:00<00:00, 8583.36ex/s] +removing punctuation from train split #29: 0%| | 0/9522 [00:00 to the vocabulary +Adding to the vocabulary +2022-07-30 08:50:45.177810: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2022-07-30 08:50:45.177866: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) +/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) +INFO:__main__:***** Running training ***** +INFO:__main__: Num examples = 302693 +INFO:__main__: Num Epochs = 40 +INFO:__main__: Instantaneous batch size per device = 16 +INFO:__main__: Num gradient accumulation steps = 1 +INFO:__main__: Total train batch size (w. parallel & distributed) = 128 +INFO:__main__: Total optimization steps = 94560 +INFO:__main__: Gradient checkpointing: True +INFO:__main__: Use scan: False +INFO:__main__: Fuse matmuls: False +Epoch ... (1/40): 0%| | 0/40 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) + File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss + out_tree, out_flat = f_pmapped_(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f + out = pxla.xla_pmap( + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind + return map_bind(self, fun, *args, **params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process + return trace.process_map(self, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call + return primitive.impl(f, *tracers, **params) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl + compiled_fun, fingerprint = parallel_callable( + File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun + ans = call(fun, *args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable + pmap_executable = pmap_computation.compile() + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile + self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo + compiled = dispatch.compile_or_get_cached( + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached + return backend_compile(backend, computation, compile_options, host_callbacks) + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile + return backend.compile(built_c, compile_options=options) +jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 17.42G of 15.48G hbm. Exceeded hbm capacity by 1.94G. +Total hbm usage >= 17.94G: + reserved 530.00M + program 6.62G + arguments 10.80G +Output size 10.76G; shares 10.76G with arguments. +Program hbm requirement 6.62G: + global 180.0K + scoped 72.08M + HLO temp 6.55G (99.2% utilization: Unpadded (5.76G) Padded (5.80G), 11.4% fragmentation (766.16M)) + Largest program allocations in hbm: + 1. Size: 1000.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[16,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 974.61M + Extra memory due to padding: 25.39M (1.0x expansion) + XLA label: fusion.181.remat6 = fusion(bitcast.7446, bitcast.7444, fusion.14564), kind=kOutput, calls=fused_computation.177.clone.clone.clone.clone.clone.clone + Allocation type: HLO temp + ========================== + 2. Size: 312.19M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,5120]{2,0,1:T(8,128)} + Unpadded size: 312.19M + XLA label: fusion.1572.remat = fusion(fusion.6366, get-tuple-element.20837, bitcast.11078), kind=kOutput, calls=fused_computation.1412.clone + Allocation type: HLO temp + ========================== + 3. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[16,999,5120]{2,0,1:T(8,128)(2,1)} + Unpadded size: 156.09M + XLA label: fusion.24057 = fusion(fusion.1572.remat, get-tuple-element.20812, get-tuple-element.20811, get-tuple-element.20846, ...(+1)), kind=kOutput, calls=fused_computation.18874 + Allocation type: HLO temp + ========================== + 4. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/36/remat(core_fn)/36/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.7158 = fusion(get-tuple-element.9865, get-tuple-element.20385, get-tuple-element.10106, get-tuple-element.20384, ...(+3)), kind=kLoop, calls=fused_computation.6602 + Allocation type: HLO temp + ========================== + 5. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/16/16/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20090 = fusion(get-tuple-element.11264, get-tuple-element.13336, fusion.1629, bitcast.10986), kind=kOutput, calls=fused_computation.18656 + Allocation type: HLO temp + ========================== + 6. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/34/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20054 = fusion(fusion.6764.remat, get-tuple-element.13656, fusion.1611, bitcast.10950), kind=kOutput, calls=fused_computation.18620 + Allocation type: HLO temp + ========================== + 7. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/33/33/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20056 = fusion(fusion.6766.remat, get-tuple-element.13640, fusion.1612, bitcast.10952), kind=kOutput, calls=fused_computation.18622 + Allocation type: HLO temp + ========================== + 8. Size: 78.05M + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: copy.17574 = copy(bitcast.14536) + Allocation type: HLO temp + ========================== + 9. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20124 = fusion(fusion.6836, copy.17574, get-tuple-element.13966, fusion.8945, ...(+1)), kind=kLoop, calls=fused_computation.18690 + Allocation type: HLO temp + ========================== + 10. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.6660.remat2 = fusion(get-tuple-element.20844, copy.15230.remat2, get-tuple-element.20831, bitcast.11077), kind=kOutput, calls=fused_computation.6104.clone.clone + Allocation type: HLO temp + ========================== + 11. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20122 = fusion(get-tuple-element.11200, get-tuple-element.13208, fusion.1645, bitcast.11018), kind=kOutput, calls=fused_computation.18688 + Allocation type: HLO temp + ========================== + 12. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20120 = fusion(get-tuple-element.11204, get-tuple-element.13224, fusion.1644, bitcast.11016), kind=kOutput, calls=fused_computation.18686 + Allocation type: HLO temp + ========================== + 13. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20118 = fusion(get-tuple-element.11208, get-tuple-element.13400, fusion.1643, bitcast.11014), kind=kOutput, calls=fused_computation.18684 + Allocation type: HLO temp + ========================== + 14. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20116 = fusion(get-tuple-element.11212, get-tuple-element.13576, fusion.1642, bitcast.11012), kind=kOutput, calls=fused_computation.18682 + Allocation type: HLO temp + ========================== + 15. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20114 = fusion(get-tuple-element.11216, get-tuple-element.13752, fusion.1641, bitcast.11010), kind=kOutput, calls=fused_computation.18680 + Allocation type: HLO temp + ========================== + 16. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20112 = fusion(get-tuple-element.11220, get-tuple-element.13896, fusion.1640, bitcast.11008), kind=kOutput, calls=fused_computation.18678 + Allocation type: HLO temp + ========================== + 17. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20110 = fusion(get-tuple-element.11224, get-tuple-element.13912, fusion.1639, bitcast.11006), kind=kOutput, calls=fused_computation.18676 + Allocation type: HLO temp + ========================== + 18. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20108 = fusion(get-tuple-element.11228, get-tuple-element.13928, fusion.1638, bitcast.11004), kind=kOutput, calls=fused_computation.18674 + Allocation type: HLO temp + ========================== + 19. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20106 = fusion(get-tuple-element.11232, get-tuple-element.13944, fusion.1637, bitcast.11002), kind=kOutput, calls=fused_computation.18672 + Allocation type: HLO temp + ========================== + 20. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20104 = fusion(get-tuple-element.11236, get-tuple-element.13960, fusion.1636, bitcast.11000), kind=kOutput, calls=fused_computation.18670 + Allocation type: HLO temp + ========================== +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1605, in + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) +jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 17.42G of 15.48G hbm. Exceeded hbm capacity by 1.94G. +Total hbm usage >= 17.94G: + reserved 530.00M + program 6.62G + arguments 10.80G +Output size 10.76G; shares 10.76G with arguments. +Program hbm requirement 6.62G: + global 180.0K + scoped 72.08M + HLO temp 6.55G (99.2% utilization: Unpadded (5.76G) Padded (5.80G), 11.4% fragmentation (766.16M)) + Largest program allocations in hbm: + 1. Size: 1000.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[16,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 974.61M + Extra memory due to padding: 25.39M (1.0x expansion) + XLA label: fusion.181.remat6 = fusion(bitcast.7446, bitcast.7444, fusion.14564), kind=kOutput, calls=fused_computation.177.clone.clone.clone.clone.clone.clone + Allocation type: HLO temp + ========================== + 2. Size: 312.19M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,5120]{2,0,1:T(8,128)} + Unpadded size: 312.19M + XLA label: fusion.1572.remat = fusion(fusion.6366, get-tuple-element.20837, bitcast.11078), kind=kOutput, calls=fused_computation.1412.clone + Allocation type: HLO temp + ========================== + 3. Size: 156.09M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[16,999,5120]{2,0,1:T(8,128)(2,1)} + Unpadded size: 156.09M + XLA label: fusion.24057 = fusion(fusion.1572.remat, get-tuple-element.20812, get-tuple-element.20811, get-tuple-element.20846, ...(+1)), kind=kOutput, calls=fused_computation.18874 + Allocation type: HLO temp + ========================== + 4. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/36/remat(core_fn)/36/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.7158 = fusion(get-tuple-element.9865, get-tuple-element.20385, get-tuple-element.10106, get-tuple-element.20384, ...(+3)), kind=kLoop, calls=fused_computation.6602 + Allocation type: HLO temp + ========================== + 5. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/16/16/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20090 = fusion(get-tuple-element.11264, get-tuple-element.13336, fusion.1629, bitcast.10986), kind=kOutput, calls=fused_computation.18656 + Allocation type: HLO temp + ========================== + 6. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/34/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20054 = fusion(fusion.6764.remat, get-tuple-element.13656, fusion.1611, bitcast.10950), kind=kOutput, calls=fused_computation.18620 + Allocation type: HLO temp + ========================== + 7. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/33/33/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20056 = fusion(fusion.6766.remat, get-tuple-element.13640, fusion.1612, bitcast.10952), kind=kOutput, calls=fused_computation.18622 + Allocation type: HLO temp + ========================== + 8. Size: 78.05M + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: copy.17574 = copy(bitcast.14536) + Allocation type: HLO temp + ========================== + 9. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20124 = fusion(fusion.6836, copy.17574, get-tuple-element.13966, fusion.8945, ...(+1)), kind=kLoop, calls=fused_computation.18690 + Allocation type: HLO temp + ========================== + 10. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.6660.remat2 = fusion(get-tuple-element.20844, copy.15230.remat2, get-tuple-element.20831, bitcast.11077), kind=kOutput, calls=fused_computation.6104.clone.clone + Allocation type: HLO temp + ========================== + 11. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20122 = fusion(get-tuple-element.11200, get-tuple-element.13208, fusion.1645, bitcast.11018), kind=kOutput, calls=fused_computation.18688 + Allocation type: HLO temp + ========================== + 12. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20120 = fusion(get-tuple-element.11204, get-tuple-element.13224, fusion.1644, bitcast.11016), kind=kOutput, calls=fused_computation.18686 + Allocation type: HLO temp + ========================== + 13. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20118 = fusion(get-tuple-element.11208, get-tuple-element.13400, fusion.1643, bitcast.11014), kind=kOutput, calls=fused_computation.18684 + Allocation type: HLO temp + ========================== + 14. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20116 = fusion(get-tuple-element.11212, get-tuple-element.13576, fusion.1642, bitcast.11012), kind=kOutput, calls=fused_computation.18682 + Allocation type: HLO temp + ========================== + 15. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20114 = fusion(get-tuple-element.11216, get-tuple-element.13752, fusion.1641, bitcast.11010), kind=kOutput, calls=fused_computation.18680 + Allocation type: HLO temp + ========================== + 16. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20112 = fusion(get-tuple-element.11220, get-tuple-element.13896, fusion.1640, bitcast.11008), kind=kOutput, calls=fused_computation.18678 + Allocation type: HLO temp + ========================== + 17. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20110 = fusion(get-tuple-element.11224, get-tuple-element.13912, fusion.1639, bitcast.11006), kind=kOutput, calls=fused_computation.18676 + Allocation type: HLO temp + ========================== + 18. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20108 = fusion(get-tuple-element.11228, get-tuple-element.13928, fusion.1638, bitcast.11004), kind=kOutput, calls=fused_computation.18674 + Allocation type: HLO temp + ========================== + 19. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20106 = fusion(get-tuple-element.11232, get-tuple-element.13944, fusion.1637, bitcast.11002), kind=kOutput, calls=fused_computation.18672 + Allocation type: HLO temp + ========================== + 20. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[16,999,1280]{2,0,1:T(8,128)} + Unpadded size: 78.05M + XLA label: fusion.20104 = fusion(get-tuple-element.11236, get-tuple-element.13960, fusion.1636, bitcast.11000), kind=kOutput, calls=fused_computation.18670 + Allocation type: HLO temp + ========================== \ No newline at end of file diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt b/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/wandb-metadata.json b/wandb/run-20220730_083825-1jwtqtqg/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8a9168dc9d1d93de8b629db4bd1ae50782a9539e --- /dev/null +++ b/wandb/run-20220730_083825-1jwtqtqg/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-30T08:38:29.242261", + "startedAt": "2022-07-30T08:38:25.752508", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=./", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=16", + "--per_device_eval_batch_size=16", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json b/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a7ef6c1b2317e4698b2d4108e7c29c613ad653db --- /dev/null +++ b/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 1012}} \ No newline at end of file diff --git a/wandb/run-20220730_083825-1jwtqtqg/logs/debug-internal.log b/wandb/run-20220730_083825-1jwtqtqg/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..438502a84c016369287cabda9173a7fc3e0fceb3 --- /dev/null +++ b/wandb/run-20220730_083825-1jwtqtqg/logs/debug-internal.log @@ -0,0 +1,556 @@ +2022-07-30 08:38:26,680 INFO MainThread:87547 [internal.py:wandb_internal():87] W&B internal server running at pid: 87547, started at: 2022-07-30 08:38:26.680310 +2022-07-30 08:38:26,682 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: check_version +2022-07-30 08:38:26,683 DEBUG SenderThread:87547 [sender.py:send():234] send: header +2022-07-30 08:38:26,683 INFO WriterThread:87547 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/run-1jwtqtqg.wandb +2022-07-30 08:38:26,683 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: check_version +2022-07-30 08:38:26,721 DEBUG SenderThread:87547 [sender.py:send():234] send: run +2022-07-30 08:38:26,926 INFO SenderThread:87547 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files +2022-07-30 08:38:26,926 INFO SenderThread:87547 [sender.py:_start_run_threads():804] run started: 1jwtqtqg with start time 1659170305 +2022-07-30 08:38:26,926 DEBUG SenderThread:87547 [sender.py:send():234] send: summary +2022-07-30 08:38:26,927 INFO SenderThread:87547 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 08:38:26,928 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: run_start +2022-07-30 08:38:27,928 INFO Thread-8 :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json +2022-07-30 08:38:29,241 DEBUG HandlerThread:87547 [meta.py:__init__():40] meta init +2022-07-30 08:38:29,242 DEBUG HandlerThread:87547 [meta.py:__init__():54] meta init done +2022-07-30 08:38:29,242 DEBUG HandlerThread:87547 [meta.py:probe():214] probe +2022-07-30 08:38:29,244 DEBUG HandlerThread:87547 [meta.py:_setup_git():204] setup git +2022-07-30 08:38:29,289 DEBUG HandlerThread:87547 [meta.py:_setup_git():211] setup git done +2022-07-30 08:38:29,289 DEBUG HandlerThread:87547 [meta.py:_save_code():92] save code +2022-07-30 08:38:29,303 DEBUG HandlerThread:87547 [meta.py:_save_code():113] save code done +2022-07-30 08:38:29,303 DEBUG HandlerThread:87547 [meta.py:_save_patches():130] save patches +2022-07-30 08:38:29,384 DEBUG HandlerThread:87547 [meta.py:_save_patches():172] save patches done +2022-07-30 08:38:29,384 DEBUG HandlerThread:87547 [meta.py:_save_pip():58] save pip +2022-07-30 08:38:29,385 DEBUG HandlerThread:87547 [meta.py:_save_pip():72] save pip done +2022-07-30 08:38:29,385 DEBUG HandlerThread:87547 [meta.py:probe():252] probe done +2022-07-30 08:38:29,388 DEBUG SenderThread:87547 [sender.py:send():234] send: files +2022-07-30 08:38:29,389 INFO SenderThread:87547 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-30 08:38:29,389 INFO SenderThread:87547 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-30 08:38:29,390 INFO SenderThread:87547 [sender.py:_save_file():939] saving file diff.patch with policy now +2022-07-30 08:38:29,395 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:38:29,396 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:38:29,884 INFO Thread-11 :87547 [upload_job.py:push():137] Uploaded file /tmp/tmps4tk8s_gwandb/331ec8hl-wandb-metadata.json +2022-07-30 08:38:29,931 INFO Thread-8 :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/diff.patch +2022-07-30 08:38:29,931 INFO Thread-8 :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:38:29,931 INFO Thread-8 :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-metadata.json +2022-07-30 08:38:29,931 INFO Thread-8 :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/code/run_flax_speech_recognition_ctc.py +2022-07-30 08:38:29,931 INFO Thread-8 :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt +2022-07-30 08:38:29,931 INFO Thread-8 :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/code +2022-07-30 08:38:29,935 INFO Thread-13 :87547 [upload_job.py:push():137] Uploaded file /tmp/tmps4tk8s_gwandb/1tu8yniu-diff.patch +2022-07-30 08:38:30,103 INFO Thread-12 :87547 [upload_job.py:push():137] Uploaded file /tmp/tmps4tk8s_gwandb/69l0n8jf-code/run_flax_speech_recognition_ctc.py +2022-07-30 08:38:31,932 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:38:33,932 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:38:35,934 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:38:37,935 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:38:43,938 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:38:44,561 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:38:44,562 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:38:45,939 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:38:57,332 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:38:57,943 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:38:59,702 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:38:59,703 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:38:59,944 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:39:01,945 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:39:10,949 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:39:12,950 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:39:14,860 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:39:14,860 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:39:14,951 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:39:27,405 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:39:28,957 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:39:30,001 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:39:30,001 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:39:45,152 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:39:45,153 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:39:57,471 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:40:00,306 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:40:00,307 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:40:12,975 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:14,976 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:15,474 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:40:15,475 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:40:16,977 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:18,977 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:20,978 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:22,979 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:24,980 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:26,981 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:27,540 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:40:28,982 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:30,655 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:40:30,655 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:40:30,983 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:32,984 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:34,985 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:36,986 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:38,987 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:40,988 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:42,990 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:44,992 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:46,024 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:40:46,024 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:40:46,993 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:48,994 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:50,995 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:52,996 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:54,997 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:56,998 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:40:57,607 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:40:58,999 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:01,000 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:01,159 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:41:01,159 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:41:03,001 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:05,003 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:07,004 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:09,005 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:11,006 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:13,007 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:15,008 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:16,308 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:41:16,308 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:41:17,009 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:19,010 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:21,011 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:23,012 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:25,013 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:27,014 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:27,686 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:41:29,018 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:31,019 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:31,471 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:41:31,471 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:41:33,020 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:35,021 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:37,022 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:39,023 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:41,024 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:43,025 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:45,026 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:46,608 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:41:46,608 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:41:47,028 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:49,028 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:51,029 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:53,030 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:55,031 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:57,036 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:41:57,762 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:41:59,037 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:01,038 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:01,751 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:42:01,752 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:42:03,039 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:05,040 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:07,041 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:09,042 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:11,043 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:13,044 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:15,046 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:16,897 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:42:16,898 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:42:17,046 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:19,047 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:21,048 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:23,049 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:25,050 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:27,051 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:27,839 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:42:29,052 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:32,043 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:42:32,043 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:42:32,054 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:34,055 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:36,056 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:38,058 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:40,058 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:42,060 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:44,061 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:46,062 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:47,185 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:42:47,185 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:42:48,063 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:50,064 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:52,065 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:54,066 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:56,067 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:42:57,924 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:42:58,068 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:00,069 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:02,070 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:02,347 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:43:02,348 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:43:04,071 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:06,072 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:08,073 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:10,074 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:12,076 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:14,077 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:16,078 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:17,491 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:43:17,491 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:43:18,080 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:20,081 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:22,082 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:24,083 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:26,085 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:28,002 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:43:28,086 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:30,088 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:32,088 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:32,633 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:43:32,633 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:43:34,089 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:36,090 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:38,091 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:40,092 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:42,093 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:44,094 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:46,095 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:47,771 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:43:47,772 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:43:48,097 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:50,098 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:52,099 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:54,100 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:56,101 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:43:58,085 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:43:58,102 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:00,103 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:02,104 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:02,924 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:44:02,925 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:44:04,106 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:06,107 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:08,108 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:10,109 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:12,110 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:14,111 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:16,113 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:18,074 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:44:18,075 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:44:18,114 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:20,115 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:22,116 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:24,117 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:26,118 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:28,119 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:28,166 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:44:30,120 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:32,121 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:33,222 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:44:33,222 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:44:34,122 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:36,123 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:38,124 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:40,125 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:42,126 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:44,127 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:46,128 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:48,129 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:48,359 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:44:48,359 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:44:50,130 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:52,131 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:54,133 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:56,135 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:58,137 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:44:58,256 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:45:00,138 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:02,139 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:03,530 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:45:03,530 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:45:04,140 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:06,141 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:08,142 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:11,143 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:13,144 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:15,145 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:17,146 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:18,671 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:45:18,672 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:45:19,147 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:21,148 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:23,149 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:25,151 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:27,152 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:28,334 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:45:29,153 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:31,155 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:33,156 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:33,836 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:45:33,836 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:45:35,157 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:37,158 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:39,159 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:41,160 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:43,161 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:45,162 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:47,162 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:48,981 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:45:48,981 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:45:49,163 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:45:58,416 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:46:04,118 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:46:04,119 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:46:19,477 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:46:19,477 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:46:25,181 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:27,182 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:28,496 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:46:29,183 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:31,184 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:33,185 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:34,735 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:46:34,735 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:46:35,187 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:37,188 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:39,189 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:41,190 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:43,191 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:45,192 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:47,193 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:49,194 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:49,900 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:46:49,900 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:46:51,195 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:53,196 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:55,197 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:46:58,577 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:46:59,199 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:47:01,200 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:47:05,046 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:47:05,046 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:47:05,202 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:47:07,203 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:47:09,204 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:47:11,205 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:47:13,206 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:47:20,187 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:47:20,187 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:47:28,648 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:47:35,320 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:47:35,320 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:47:43,220 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:47:50,672 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:47:50,672 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:47:54,225 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:47:56,226 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:47:58,227 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:47:58,722 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:48:00,228 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:02,229 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:04,230 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:05,900 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:48:05,900 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:48:06,231 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:08,232 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:10,233 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:12,234 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:14,235 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:16,236 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:18,237 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:20,238 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:21,039 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:48:21,039 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:48:22,240 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:26,242 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:28,243 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:28,802 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:48:30,244 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:48:36,185 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:48:36,185 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:48:51,341 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:48:51,341 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:48:58,878 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:49:06,476 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:49:06,477 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:49:14,269 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:49:21,615 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:49:21,616 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:49:28,952 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:49:36,750 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:49:36,750 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:49:51,888 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:49:51,888 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:49:59,026 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:50:05,291 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:50:07,073 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:50:07,073 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:50:13,295 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:50:22,299 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:50:22,333 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:50:22,333 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:50:29,103 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:50:30,303 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:50:36,306 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:50:37,755 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:50:37,755 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:50:45,311 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:50:47,312 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:50:51,314 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:50:52,966 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:50:52,966 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:50:55,316 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:50:57,317 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:50:59,182 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:51:03,320 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:51:08,255 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:51:08,255 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:51:23,531 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:51:23,531 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:51:29,261 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:51:37,334 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:51:38,693 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:51:38,694 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:51:45,337 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:51:47,338 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:51:53,869 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:51:53,870 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:51:59,339 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:52:09,018 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:52:09,018 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:52:24,153 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:52:24,153 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:52:29,417 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:52:39,288 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:52:39,288 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:52:54,426 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:52:54,426 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:52:59,496 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:53:09,558 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:53:09,559 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:53:24,692 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:53:24,693 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:53:29,575 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:53:39,829 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:53:39,829 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:53:54,966 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:53:54,967 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:53:59,650 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:54:10,100 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:54:10,100 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:54:25,249 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:54:25,250 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:54:29,728 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:54:40,383 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:54:40,384 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:54:55,518 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:54:55,518 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:54:59,952 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:55:10,659 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 08:55:10,659 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: stop_status +2022-07-30 08:55:19,446 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:55:19,859 DEBUG SenderThread:87547 [sender.py:send():234] send: telemetry +2022-07-30 08:55:19,859 DEBUG SenderThread:87547 [sender.py:send():234] send: exit +2022-07-30 08:55:19,859 INFO SenderThread:87547 [sender.py:send_exit():366] handling exit code: 1 +2022-07-30 08:55:19,861 INFO SenderThread:87547 [sender.py:send_exit():368] handling runtime: 1012 +2022-07-30 08:55:19,861 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:19,861 INFO SenderThread:87547 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 08:55:19,862 INFO SenderThread:87547 [sender.py:send_exit():374] send defer +2022-07-30 08:55:19,862 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:19,863 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:55:19,863 INFO HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-30 08:55:19,863 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: defer +2022-07-30 08:55:19,863 INFO SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-30 08:55:19,863 INFO SenderThread:87547 [sender.py:transition_state():387] send defer: 1 +2022-07-30 08:55:19,864 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:55:19,864 INFO HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-30 08:55:19,905 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: defer +2022-07-30 08:55:19,905 INFO SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-30 08:55:19,905 INFO SenderThread:87547 [sender.py:transition_state():387] send defer: 2 +2022-07-30 08:55:19,905 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:55:19,905 INFO HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-30 08:55:19,905 DEBUG SenderThread:87547 [sender.py:send():234] send: stats +2022-07-30 08:55:19,906 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: defer +2022-07-30 08:55:19,906 INFO SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-30 08:55:19,906 INFO SenderThread:87547 [sender.py:transition_state():387] send defer: 3 +2022-07-30 08:55:19,906 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:55:19,906 INFO HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-30 08:55:19,907 DEBUG SenderThread:87547 [sender.py:send():234] send: summary +2022-07-30 08:55:19,907 INFO SenderThread:87547 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 08:55:19,907 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: defer +2022-07-30 08:55:19,907 INFO SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-30 08:55:19,907 INFO SenderThread:87547 [sender.py:transition_state():387] send defer: 4 +2022-07-30 08:55:19,908 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:55:19,908 INFO HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-30 08:55:19,908 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: defer +2022-07-30 08:55:19,908 INFO SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-30 08:55:19,965 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:20,076 INFO SenderThread:87547 [sender.py:transition_state():387] send defer: 5 +2022-07-30 08:55:20,076 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:20,077 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:55:20,077 INFO HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-30 08:55:20,077 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: defer +2022-07-30 08:55:20,077 INFO SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-30 08:55:20,078 INFO SenderThread:87547 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-30 08:55:20,178 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:20,446 INFO Thread-8 :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml +2022-07-30 08:55:20,447 INFO SenderThread:87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:55:20,447 INFO SenderThread:87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json +2022-07-30 08:55:20,447 INFO SenderThread:87547 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files +2022-07-30 08:55:20,447 INFO SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml config.yaml +2022-07-30 08:55:20,447 INFO SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/diff.patch diff.patch +2022-07-30 08:55:20,447 INFO SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt requirements.txt +2022-07-30 08:55:20,448 INFO SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log output.log +2022-07-30 08:55:20,448 INFO SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json wandb-summary.json +2022-07-30 08:55:20,448 INFO SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-metadata.json wandb-metadata.json +2022-07-30 08:55:20,451 INFO SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-30 08:55:20,451 INFO SenderThread:87547 [sender.py:transition_state():387] send defer: 6 +2022-07-30 08:55:20,451 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:20,460 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:55:20,461 INFO HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-30 08:55:20,461 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: defer +2022-07-30 08:55:20,461 INFO SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-30 08:55:20,461 INFO SenderThread:87547 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 08:55:20,555 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:20,556 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:20,657 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:20,657 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:20,758 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:20,758 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:20,860 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:20,860 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:20,915 INFO Thread-15 :87547 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt +2022-07-30 08:55:20,950 INFO Thread-17 :87547 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json +2022-07-30 08:55:20,957 INFO Thread-14 :87547 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml +2022-07-30 08:55:20,962 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:20,962 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:21,063 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:21,063 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:21,121 INFO Thread-16 :87547 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log +2022-07-30 08:55:21,165 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:21,165 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:21,266 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:21,266 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:21,322 INFO Thread-7 :87547 [sender.py:transition_state():387] send defer: 7 +2022-07-30 08:55:21,322 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:55:21,322 INFO HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-30 08:55:21,323 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: defer +2022-07-30 08:55:21,323 INFO SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-30 08:55:21,367 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:21,810 INFO SenderThread:87547 [sender.py:transition_state():387] send defer: 8 +2022-07-30 08:55:21,810 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:21,811 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:55:21,811 INFO HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-30 08:55:21,812 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: defer +2022-07-30 08:55:21,812 INFO SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-30 08:55:21,812 INFO SenderThread:87547 [sender.py:transition_state():387] send defer: 9 +2022-07-30 08:55:21,812 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer +2022-07-30 08:55:21,812 INFO HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-30 08:55:21,813 DEBUG SenderThread:87547 [sender.py:send():234] send: final +2022-07-30 08:55:21,813 DEBUG SenderThread:87547 [sender.py:send():234] send: footer +2022-07-30 08:55:21,813 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: defer +2022-07-30 08:55:21,813 INFO SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-30 08:55:21,912 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 08:55:21,912 DEBUG SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 08:55:21,913 INFO SenderThread:87547 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 08:55:22,166 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: get_summary +2022-07-30 08:55:22,167 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-30 08:55:22,168 DEBUG HandlerThread:87547 [handler.py:handle_request():130] handle_request: shutdown +2022-07-30 08:55:22,168 INFO HandlerThread:87547 [handler.py:finish():731] shutting down handler +2022-07-30 08:55:22,813 INFO WriterThread:87547 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/run-1jwtqtqg.wandb +2022-07-30 08:55:23,165 INFO SenderThread:87547 [sender.py:finish():1070] shutting down sender +2022-07-30 08:55:23,166 INFO SenderThread:87547 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 08:55:23,166 INFO SenderThread:87547 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 08:55:23,169 INFO MainThread:87547 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220730_083825-1jwtqtqg/logs/debug.log b/wandb/run-20220730_083825-1jwtqtqg/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..ce1416381bfb6880eda6908eca34a96692623a71 --- /dev/null +++ b/wandb/run-20220730_083825-1jwtqtqg/logs/debug.log @@ -0,0 +1,148 @@ +2022-07-30 08:38:25,754 INFO MainThread:86199 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-30 08:38:25,754 INFO MainThread:86199 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-30 08:38:25,754 INFO MainThread:86199 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/logs/debug.log +2022-07-30 08:38:25,754 INFO MainThread:86199 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/logs/debug-internal.log +2022-07-30 08:38:25,754 INFO MainThread:86199 [wandb_init.py:init():404] calling init triggers +2022-07-30 08:38:25,754 INFO MainThread:86199 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-30 08:38:25,754 INFO MainThread:86199 [wandb_init.py:init():460] starting backend +2022-07-30 08:38:25,754 INFO MainThread:86199 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-30 08:38:25,812 INFO MainThread:86199 [backend.py:ensure_launched():216] starting backend process... +2022-07-30 08:38:25,858 INFO MainThread:86199 [backend.py:ensure_launched():221] started backend process with pid: 87547 +2022-07-30 08:38:25,860 INFO MainThread:86199 [wandb_init.py:init():469] backend started and connected +2022-07-30 08:38:25,875 INFO MainThread:86199 [wandb_init.py:init():533] updated telemetry +2022-07-30 08:38:25,991 INFO MainThread:86199 [wandb_init.py:init():563] communicating current version +2022-07-30 08:38:26,719 INFO MainThread:86199 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-30 08:38:26,720 INFO MainThread:86199 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-30 08:38:26,927 INFO MainThread:86199 [wandb_init.py:init():606] starting run threads in backend +2022-07-30 08:38:29,392 INFO MainThread:86199 [wandb_run.py:_console_start():1810] atexit reg +2022-07-30 08:38:29,393 INFO MainThread:86199 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-30 08:38:29,393 INFO MainThread:86199 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-30 08:38:29,395 INFO MainThread:86199 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-30 08:38:29,395 INFO MainThread:86199 [wandb_init.py:init():633] run started, returning control to user process +2022-07-30 08:55:17,539 INFO MainThread:86199 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-30 08:55:17,545 INFO MainThread:86199 [wandb_run.py:_restore():1752] restore +2022-07-30 08:55:19,863 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 74317 +} + +2022-07-30 08:55:20,077 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 74317 +} + +2022-07-30 08:55:20,454 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 402992 +} + +2022-07-30 08:55:20,556 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 402992 +} + +2022-07-30 08:55:20,657 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 402992 + total_bytes: 402992 +} + +2022-07-30 08:55:20,759 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 402992 + total_bytes: 402992 +} + +2022-07-30 08:55:20,861 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 402992 + total_bytes: 402992 +} + +2022-07-30 08:55:20,962 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 402992 + total_bytes: 402992 +} + +2022-07-30 08:55:21,064 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 402992 + total_bytes: 402992 +} + +2022-07-30 08:55:21,165 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 402992 + total_bytes: 402992 +} + +2022-07-30 08:55:21,267 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 402992 + total_bytes: 402992 +} + +2022-07-30 08:55:21,811 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 402992 + total_bytes: 402992 +} + +2022-07-30 08:55:22,166 INFO MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 402992 + total_bytes: 402992 +} +local_info { +} + +2022-07-30 08:55:23,802 INFO MainThread:86199 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220730_083825-1jwtqtqg/run-1jwtqtqg.wandb b/wandb/run-20220730_083825-1jwtqtqg/run-1jwtqtqg.wandb new file mode 100644 index 0000000000000000000000000000000000000000..5d38a5e7bc2416b6cb3dbeb0d80a9200b273f9a7 --- /dev/null +++ b/wandb/run-20220730_083825-1jwtqtqg/run-1jwtqtqg.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f517ba230a4b8d99820d7afad49dde911ae46c3cce8c7015ee6ce592b73a6777 +size 443514 diff --git a/wandb/run-20220730_093953-16dexcvn/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_093953-16dexcvn/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac --- /dev/null +++ b/wandb/run-20220730_093953-16dexcvn/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220730_093953-16dexcvn/files/config.yaml b/wandb/run-20220730_093953-16dexcvn/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98b5896181e96443fa663c1aa8366b535092204d --- /dev/null +++ b/wandb/run-20220730_093953-16dexcvn/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659173993 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220730_093953-16dexcvn/files/diff.patch b/wandb/run-20220730_093953-16dexcvn/files/diff.patch new file mode 100644 index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642 --- /dev/null +++ b/wandb/run-20220730_093953-16dexcvn/files/diff.patch @@ -0,0 +1,10 @@ +diff --git a/.gitattributes b/.gitattributes +index 755356a..f0eef4b 100644 +--- a/.gitattributes ++++ b/.gitattributes +@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text + *.zip filter=lfs diff=lfs merge=lfs -text + *.zstandard filter=lfs diff=lfs merge=lfs -text + *tfevents* filter=lfs diff=lfs merge=lfs -text ++*.wandb filter=lfs diff=lfs merge=lfs -text +\ No newline at end of file diff --git a/wandb/run-20220730_093953-16dexcvn/files/output.log b/wandb/run-20220730_093953-16dexcvn/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d8c67e7338fafb06ba08ce41c8407f7102009772 --- /dev/null +++ b/wandb/run-20220730_093953-16dexcvn/files/output.log @@ -0,0 +1,1614 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul30_09-39-49_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=12, +per_device_train_batch_size=12, +precision=full, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 78.32it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 469.02it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'kernel'), ('project_q', 'kernel'), ('project_hid', 'bias'), ('project_q', 'bias'), ('quantizer', 'codevectors'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'weight_proj', 'bias')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9005.53ex/s] +removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8305.21ex/s] +removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8893.39ex/s] +removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8962.96ex/s] +removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8046.06ex/s] +removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8535.17ex/s] +removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8824.27ex/s] +removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8296.15ex/s] +removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8468.31ex/s] +removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8493.52ex/s] +removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8583.75ex/s] +removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8299.35ex/s] +removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 6715.33ex/s] +removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8398.49ex/s] +removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8081.26ex/s] +removing punctuation from train split #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8371.54ex/s] +removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8235.09ex/s] +removing punctuation from train split #7: 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8361/9523 [00:01<00:00, 6123.53ex/s] +removing punctuation from train split #7: 97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9215/9523 [00:01<00:00, 6776.16ex/s] +removing punctuation from train split #12: 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7654/9522 [00:00<00:00, 7914.30ex/s] +removing punctuation from train split #11: 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8772/9523 [00:01<00:00, 8762.24ex/s] +removing punctuation from train split #14: 72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 6836/9522 [00:00<00:00, 8633.05ex/s] +removing punctuation from train split #12: 89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8506/9522 [00:01<00:00, 8091.22ex/s] +removing punctuation from train split #12: 98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9346/9522 [00:01<00:00, 8180.56ex/s] +removing punctuation from train split #13: 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8282/9522 [00:01<00:00, 7900.65ex/s] +removing punctuation from train split #14: 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8579/9522 [00:01<00:00, 8236.27ex/s] +removing punctuation from train split #13: 96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9110/9522 [00:01<00:00, 8012.09ex/s] +removing punctuation from train split #14: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9456/9522 [00:01<00:00, 8390.83ex/s] +removing punctuation from train split #17: 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 6679/9522 [00:00<00:00, 8541.97ex/s] +removing punctuation from train split #16: 79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 7538/9522 [00:00<00:00, 7832.63ex/s] +removing punctuation from train split #15: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9431/9522 [00:01<00:00, 8358.51ex/s] +removing punctuation from train split #16: 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8394/9522 [00:01<00:00, 8042.70ex/s] +removing punctuation from train split #16: 97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9270/9522 [00:01<00:00, 8252.38ex/s] +removing punctuation from train split #17: 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8404/9522 [00:01<00:00, 8140.35ex/s] +removing punctuation from train split #18: 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8586/9522 [00:01<00:00, 8490.22ex/s] +removing punctuation from train split #19: 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 7629/9522 [00:00<00:00, 8297.75ex/s] +removing punctuation from train split #19: 89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 8520/9522 [00:01<00:00, 8479.08ex/s] +removing punctuation from train split #21: 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 6656/9522 [00:00<00:00, 8263.56ex/s] +removing punctuation from train split #22: 71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 6766/9522 [00:00<00:00, 8573.39ex/s] +removing punctuation from train split #23: 63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 5994/9522 [00:00<00:00, 8650.52ex/s] +removing punctuation from train split #23: 72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 6880/9522 [00:00<00:00, 8715.22ex/s] +removing punctuation from train split #25: 54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 5110/9522 [00:00<00:00, 7778.84ex/s] +removing punctuation from train split #26: 53%|████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 5086/9522 [00:00<00:00, 8627.94ex/s] +removing punctuation from train split #27: 44%|███████████████████████████████████████████████████████████████████████████████████████▏ | 4234/9522 [00:00<00:00, 8573.14ex/s] +removing punctuation from train split #28: 26%|██████████████████████████████████████████████████▊ | 2469/9522 [00:00<00:00, 8323.04ex/s] +removing punctuation from train split #29: 26%|███████████████████████████████████████████████████▍ | 2499/9522 [00:00<00:00, 8407.52ex/s] +removing punctuation from train split #30: 26%|██████████████████████████████████████████████████▍ | 2450/9522 [00:00<00:00, 8264.69ex/s] +removing punctuation from train split #25: 82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7791/9522 [00:00<00:00, 8535.43ex/s] +removing punctuation from train split #24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 9496/9522 [00:01<00:00, 8276.56ex/s] +removing punctuation from train split #25: 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8727/9522 [00:01<00:00, 8779.85ex/s] +removing punctuation from train split #26: 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8659/9522 [00:01<00:00, 8862.78ex/s] +removing punctuation from train split #27: 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7855/9522 [00:00<00:00, 9026.60ex/s] +removing punctuation from train split #28: 75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7169/9522 [00:00<00:00, 9404.61ex/s] +removing punctuation from train split #29: 74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7024/9522 [00:00<00:00, 9096.91ex/s] +removing punctuation from train split #27: 92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8797/9522 [00:01<00:00, 9144.61ex/s] +removing punctuation from train split #28: 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8145/9522 [00:00<00:00, 9514.75ex/s] +removing punctuation from train split #28: 96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9119/9522 [00:01<00:00, 9582.49ex/s] +removing punctuation from train split #29: 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8941/9522 [00:01<00:00, 9354.41ex/s] +removing punctuation from train split #30: 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8017/9522 [00:00<00:00, 9333.88ex/s] +removing punctuation from train split #30: 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8971/9522 [00:01<00:00, 9395.12ex/s] +removing punctuation from train split #31: 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8154/9522 [00:00<00:00, 9528.76ex/s] +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00022_of_00032.arrow9121/9522 [00:01<00:00, 9572.00ex/s] +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow +preprocess dataset #0: 1%|█▉ | 85/9497 [00:02<02:36, 60.05ex/s] +preprocess dataset #1: 1%|█▏ | 53/9497 [00:01<04:04, 38.56ex/s] +preprocess dataset #2: 0%| | 5/9497 [00:00<21:51, 7.24ex/s] +preprocess dataset #3: 1%|█▋ | 74/9497 [00:02<03:33, 44.18ex/s] +preprocess dataset #4: 1%|█▏ | 51/9497 [00:01<03:39, 42.96ex/s] +preprocess dataset #5: 0%|▏ | 7/9497 [00:00<15:45, 10.04ex/s] +preprocess dataset #6: 1%|█▎ | 55/9497 [00:01<03:30, 44.83ex/s] +preprocess dataset #7: 0%|▋ | 29/9497 [00:01<04:14, 37.17ex/s] +preprocess dataset #8: 1%|█▋ | 74/9497 [00:02<03:32, 44.29ex/s] +preprocess dataset #9: 0%|▋ | 32/9497 [00:01<04:46, 32.99ex/s] +preprocess dataset #10: 1%|█▋ | 72/9497 [00:02<04:24, 35.59ex/s] +preprocess dataset #11: 0%|▉ | 43/9496 [00:01<03:33, 44.37ex/s] +preprocess dataset #12: 0%| | 0/9496 [00:00 to the vocabulary +Adding to the vocabulary +2022-07-30 09:51:55.266267: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2022-07-30 09:51:55.266356: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) +/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) +INFO:__main__:***** Running training ***** +INFO:__main__: Num examples = 302693 +INFO:__main__: Num Epochs = 40 +INFO:__main__: Instantaneous batch size per device = 12 +INFO:__main__: Num gradient accumulation steps = 1 +INFO:__main__: Total train batch size (w. parallel & distributed) = 96 +INFO:__main__: Total optimization steps = 126120 +INFO:__main__: Gradient checkpointing: True +INFO:__main__: Use scan: False +INFO:__main__: Fuse matmuls: False +Epoch ... (1/40): 0%| | 0/40 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) + File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss + out_tree, out_flat = f_pmapped_(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f + out = pxla.xla_pmap( + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind + return map_bind(self, fun, *args, **params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process + return trace.process_map(self, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call + return primitive.impl(f, *tracers, **params) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl + compiled_fun, fingerprint = parallel_callable( + File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun + ans = call(fun, *args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable + pmap_executable = pmap_computation.compile() + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile + self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo + compiled = dispatch.compile_or_get_cached( + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached + return backend_compile(backend, computation, compile_options, host_callbacks) + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile + return backend.compile(built_c, compile_options=options) +jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 16.18G of 15.48G hbm. Exceeded hbm capacity by 717.52M. +Total hbm usage >= 16.70G: + reserved 530.00M + program 5.38G + arguments 10.80G +Output size 10.76G; shares 10.76G with arguments. +Program hbm requirement 5.38G: + global 180.0K + scoped 72.08M + HLO temp 5.31G (99.0% utilization: Unpadded (5.02G) Padded (5.07G), 4.6% fragmentation (247.75M)) + Largest program allocations in hbm: + 1. Size: 750.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[12,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 730.96M + Extra memory due to padding: 19.04M (1.0x expansion) + XLA label: fusion.194.remat6 = fusion(bitcast.8986, bitcast.8984, fusion.16271), kind=kOutput, calls=fused_computation.184.clone.clone.clone.clone.clone.clone + Allocation type: HLO temp + ========================== + 2. Size: 234.38M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,5120]{2,1,0:T(8,128)} + Unpadded size: 234.14M + Extra memory due to padding: 240.0K (1.0x expansion) + XLA label: fusion.1385.remat = fusion(get-tuple-element.34607, fusion.6987.remat2.1, get-tuple-element.34610, get-tuple-element.34611, ...(+3)), kind=kOutput, calls=fused_computation.1219.clone + Allocation type: HLO temp + ========================== + 3. Size: 117.19M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[12,999,5120]{2,1,0:T(8,128)(2,1)} + Unpadded size: 117.07M + Extra memory due to padding: 120.0K (1.0x expansion) + XLA label: fusion.27201 = fusion(fusion.1385.remat, get-tuple-element.34582, get-tuple-element.34581, get-tuple-element.34616, ...(+1)), kind=kOutput, calls=fused_computation.20943 + Allocation type: HLO temp + ========================== + 4. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/6/remat(core_fn)/6/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.7665 = fusion(get-tuple-element.9818, get-tuple-element.34155, get-tuple-element.11245, get-tuple-element.34154, ...(+3)), kind=kLoop, calls=fused_computation.7055 + Allocation type: HLO temp + ========================== + 5. Size: 58.59M + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: copy.5651 = copy(bitcast.15721) + Allocation type: HLO temp + ========================== + 6. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22617 = fusion(fusion.7619, copy.5651, get-tuple-element.13649, fusion.10635, ...(+1)), kind=kLoop, calls=fused_computation.20747 + Allocation type: HLO temp + ========================== + 7. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22615 = fusion(get-tuple-element.10888, get-tuple-element.12891, get-tuple-element.12889, get-tuple-element.12893, ...(+5)), kind=kOutput, calls=fused_computation.20745 + Allocation type: HLO temp + ========================== + 8. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22613 = fusion(get-tuple-element.10892, get-tuple-element.12907, get-tuple-element.12905, get-tuple-element.12909, ...(+5)), kind=kOutput, calls=fused_computation.20743 + Allocation type: HLO temp + ========================== + 9. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22611 = fusion(get-tuple-element.10896, get-tuple-element.13083, get-tuple-element.13081, get-tuple-element.13085, ...(+5)), kind=kOutput, calls=fused_computation.20741 + Allocation type: HLO temp + ========================== + 10. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22609 = fusion(get-tuple-element.10900, get-tuple-element.13259, get-tuple-element.13257, get-tuple-element.13261, ...(+5)), kind=kOutput, calls=fused_computation.20739 + Allocation type: HLO temp + ========================== + 11. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22607 = fusion(get-tuple-element.10904, get-tuple-element.13435, get-tuple-element.13433, get-tuple-element.13437, ...(+5)), kind=kOutput, calls=fused_computation.20737 + Allocation type: HLO temp + ========================== + 12. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.6987.remat3 = fusion(get-tuple-element.34614, bitcast.1517, get-tuple-element.34601, bitcast.12393), kind=kOutput, calls=fused_computation.6377.clone.clone.clone + Allocation type: HLO temp + ========================== + 13. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.6987.remat2 = fusion(get-tuple-element.34614, bitcast.1517, get-tuple-element.34601, bitcast.12390), kind=kOutput, calls=fused_computation.6377.clone.clone + Allocation type: HLO temp + ========================== + 14. Size: 40.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/pos_conv_embed/conv/conv/rev[dimensions=(0,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=425 + Shape: bf16[128,1280,80]{2,1,0:T(8,128)(2,1)} + Unpadded size: 25.00M + Extra memory due to padding: 15.00M (1.6x expansion) + XLA label: reverse.37400 = reverse(bitcast.2126), dimensions={0} + Allocation type: HLO temp + ========================== + 15. Size: 33.75M + Shape: bf16[12,1127,16,80]{1,3,2,0:T(8,128)(2,1)} + Unpadded size: 33.02M + Extra memory due to padding: 750.0K (1.0x expansion) + XLA label: copy.3902.remat_compressed = copy(copy.3902) + Allocation type: HLO temp + ========================== + 16. Size: 31.22M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/feature_projection/layer_norm/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=150 + Shape: f32[12,999,512]{2,0,1:T(8,128)} + Unpadded size: 23.41M + Extra memory due to padding: 7.80M (1.3x expansion) + XLA label: fusion.9234.remat = fusion(get-tuple-element.13680, get-tuple-element.13681, copy.3899, copy.3897, ...(+2)), kind=kLoop, calls=fused_computation.8286.clone + Allocation type: HLO temp + ========================== + 17. Size: 30.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/v_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200 + Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)} + Unpadded size: 29.27M + Extra memory due to padding: 750.0K (1.0x expansion) + XLA label: copy.4655.remat2 = copy(fusion.6991.remat3) + Allocation type: HLO temp + ========================== + 18. Size: 30.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/div" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=93 + Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)} + Unpadded size: 29.27M + Extra memory due to padding: 750.0K (1.0x expansion) + XLA label: copy.4656.remat2 = copy(fusion.6990.remat2) + Allocation type: HLO temp + ========================== + 19. Size: 30.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/k_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200 + Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)} + Unpadded size: 29.27M + Extra memory due to padding: 750.0K (1.0x expansion) + XLA label: copy.4657.remat2 = copy(fusion.6988.remat3) + Allocation type: HLO temp + ========================== + 20. Size: 30.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/jit(_einsum)/dot_general[dimension_numbers=(((1,), (3,)), ((0, 2), (0, 1))) precision=(, ) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387 + Shape: bf16[12,16,80,999]{3,2,1,0:T(8,128)(2,1)} + Unpadded size: 29.27M + Extra memory due to padding: 750.0K (1.0x expansion) + XLA label: fusion.787 = fusion(bitcast.8988, fusion.754, get-tuple-element.11246, bitcast.8986, ...(+2)), kind=kOutput, calls=fused_computation.718 + Allocation type: HLO temp + ========================== +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1605, in + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) +jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 16.18G of 15.48G hbm. Exceeded hbm capacity by 717.52M. +Total hbm usage >= 16.70G: + reserved 530.00M + program 5.38G + arguments 10.80G +Output size 10.76G; shares 10.76G with arguments. +Program hbm requirement 5.38G: + global 180.0K + scoped 72.08M + HLO temp 5.31G (99.0% utilization: Unpadded (5.02G) Padded (5.07G), 4.6% fragmentation (247.75M)) + Largest program allocations in hbm: + 1. Size: 750.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: f32[12,16,999,999]{2,3,1,0:T(8,128)} + Unpadded size: 730.96M + Extra memory due to padding: 19.04M (1.0x expansion) + XLA label: fusion.194.remat6 = fusion(bitcast.8986, bitcast.8984, fusion.16271), kind=kOutput, calls=fused_computation.184.clone.clone.clone.clone.clone.clone + Allocation type: HLO temp + ========================== + 2. Size: 234.38M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,5120]{2,1,0:T(8,128)} + Unpadded size: 234.14M + Extra memory due to padding: 240.0K (1.0x expansion) + XLA label: fusion.1385.remat = fusion(get-tuple-element.34607, fusion.6987.remat2.1, get-tuple-element.34610, get-tuple-element.34611, ...(+3)), kind=kOutput, calls=fused_computation.1219.clone + Allocation type: HLO temp + ========================== + 3. Size: 117.19M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[12,999,5120]{2,1,0:T(8,128)(2,1)} + Unpadded size: 117.07M + Extra memory due to padding: 120.0K (1.0x expansion) + XLA label: fusion.27201 = fusion(fusion.1385.remat, get-tuple-element.34582, get-tuple-element.34581, get-tuple-element.34616, ...(+1)), kind=kOutput, calls=fused_computation.20943 + Allocation type: HLO temp + ========================== + 4. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/6/remat(core_fn)/6/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.7665 = fusion(get-tuple-element.9818, get-tuple-element.34155, get-tuple-element.11245, get-tuple-element.34154, ...(+3)), kind=kLoop, calls=fused_computation.7055 + Allocation type: HLO temp + ========================== + 5. Size: 58.59M + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: copy.5651 = copy(bitcast.15721) + Allocation type: HLO temp + ========================== + 6. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22617 = fusion(fusion.7619, copy.5651, get-tuple-element.13649, fusion.10635, ...(+1)), kind=kLoop, calls=fused_computation.20747 + Allocation type: HLO temp + ========================== + 7. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22615 = fusion(get-tuple-element.10888, get-tuple-element.12891, get-tuple-element.12889, get-tuple-element.12893, ...(+5)), kind=kOutput, calls=fused_computation.20745 + Allocation type: HLO temp + ========================== + 8. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22613 = fusion(get-tuple-element.10892, get-tuple-element.12907, get-tuple-element.12905, get-tuple-element.12909, ...(+5)), kind=kOutput, calls=fused_computation.20743 + Allocation type: HLO temp + ========================== + 9. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22611 = fusion(get-tuple-element.10896, get-tuple-element.13083, get-tuple-element.13081, get-tuple-element.13085, ...(+5)), kind=kOutput, calls=fused_computation.20741 + Allocation type: HLO temp + ========================== + 10. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22609 = fusion(get-tuple-element.10900, get-tuple-element.13259, get-tuple-element.13257, get-tuple-element.13261, ...(+5)), kind=kOutput, calls=fused_computation.20739 + Allocation type: HLO temp + ========================== + 11. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.22607 = fusion(get-tuple-element.10904, get-tuple-element.13435, get-tuple-element.13433, get-tuple-element.13437, ...(+5)), kind=kOutput, calls=fused_computation.20737 + Allocation type: HLO temp + ========================== + 12. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.6987.remat3 = fusion(get-tuple-element.34614, bitcast.1517, get-tuple-element.34601, bitcast.12393), kind=kOutput, calls=fused_computation.6377.clone.clone.clone + Allocation type: HLO temp + ========================== + 13. Size: 58.59M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: f32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.6987.remat2 = fusion(get-tuple-element.34614, bitcast.1517, get-tuple-element.34601, bitcast.12390), kind=kOutput, calls=fused_computation.6377.clone.clone + Allocation type: HLO temp + ========================== + 14. Size: 40.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/pos_conv_embed/conv/conv/rev[dimensions=(0,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=425 + Shape: bf16[128,1280,80]{2,1,0:T(8,128)(2,1)} + Unpadded size: 25.00M + Extra memory due to padding: 15.00M (1.6x expansion) + XLA label: reverse.37400 = reverse(bitcast.2126), dimensions={0} + Allocation type: HLO temp + ========================== + 15. Size: 33.75M + Shape: bf16[12,1127,16,80]{1,3,2,0:T(8,128)(2,1)} + Unpadded size: 33.02M + Extra memory due to padding: 750.0K (1.0x expansion) + XLA label: copy.3902.remat_compressed = copy(copy.3902) + Allocation type: HLO temp + ========================== + 16. Size: 31.22M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/feature_projection/layer_norm/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=150 + Shape: f32[12,999,512]{2,0,1:T(8,128)} + Unpadded size: 23.41M + Extra memory due to padding: 7.80M (1.3x expansion) + XLA label: fusion.9234.remat = fusion(get-tuple-element.13680, get-tuple-element.13681, copy.3899, copy.3897, ...(+2)), kind=kLoop, calls=fused_computation.8286.clone + Allocation type: HLO temp + ========================== + 17. Size: 30.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/v_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200 + Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)} + Unpadded size: 29.27M + Extra memory due to padding: 750.0K (1.0x expansion) + XLA label: copy.4655.remat2 = copy(fusion.6991.remat3) + Allocation type: HLO temp + ========================== + 18. Size: 30.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/div" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=93 + Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)} + Unpadded size: 29.27M + Extra memory due to padding: 750.0K (1.0x expansion) + XLA label: copy.4656.remat2 = copy(fusion.6990.remat2) + Allocation type: HLO temp + ========================== + 19. Size: 30.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/k_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200 + Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)} + Unpadded size: 29.27M + Extra memory due to padding: 750.0K (1.0x expansion) + XLA label: copy.4657.remat2 = copy(fusion.6988.remat3) + Allocation type: HLO temp + ========================== + 20. Size: 30.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/jit(_einsum)/dot_general[dimension_numbers=(((1,), (3,)), ((0, 2), (0, 1))) precision=(, ) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387 + Shape: bf16[12,16,80,999]{3,2,1,0:T(8,128)(2,1)} + Unpadded size: 29.27M + Extra memory due to padding: 750.0K (1.0x expansion) + XLA label: fusion.787 = fusion(bitcast.8988, fusion.754, get-tuple-element.11246, bitcast.8986, ...(+2)), kind=kOutput, calls=fused_computation.718 + Allocation type: HLO temp + ========================== \ No newline at end of file diff --git a/wandb/run-20220730_093953-16dexcvn/files/requirements.txt b/wandb/run-20220730_093953-16dexcvn/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220730_093953-16dexcvn/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220730_093953-16dexcvn/files/wandb-metadata.json b/wandb/run-20220730_093953-16dexcvn/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..42f9a938eb6babc1a922aa94d71039686881e3ae --- /dev/null +++ b/wandb/run-20220730_093953-16dexcvn/files/wandb-metadata.json @@ -0,0 +1,66 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-30T09:39:57.124976", + "startedAt": "2022-07-30T09:39:53.726539", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=./", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=12", + "--per_device_eval_batch_size=12", + "--gradient_accumulation_steps=1", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json b/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..f6b533a778379f18be357d7655677b0f2090d573 --- /dev/null +++ b/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 979}} \ No newline at end of file diff --git a/wandb/run-20220730_093953-16dexcvn/logs/debug-internal.log b/wandb/run-20220730_093953-16dexcvn/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..2e5ce16b2c28d4249490f5b2ea7ff82b5d805307 --- /dev/null +++ b/wandb/run-20220730_093953-16dexcvn/logs/debug-internal.log @@ -0,0 +1,559 @@ +2022-07-30 09:39:54,635 INFO MainThread:3334444 [internal.py:wandb_internal():87] W&B internal server running at pid: 3334444, started at: 2022-07-30 09:39:54.635261 +2022-07-30 09:39:54,637 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: check_version +2022-07-30 09:39:54,638 INFO WriterThread:3334444 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/run-16dexcvn.wandb +2022-07-30 09:39:54,638 DEBUG SenderThread:3334444 [sender.py:send():234] send: header +2022-07-30 09:39:54,638 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: check_version +2022-07-30 09:39:54,676 DEBUG SenderThread:3334444 [sender.py:send():234] send: run +2022-07-30 09:39:54,876 INFO SenderThread:3334444 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files +2022-07-30 09:39:54,876 INFO SenderThread:3334444 [sender.py:_start_run_threads():804] run started: 16dexcvn with start time 1659173993 +2022-07-30 09:39:54,876 DEBUG SenderThread:3334444 [sender.py:send():234] send: summary +2022-07-30 09:39:54,876 INFO SenderThread:3334444 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 09:39:54,876 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: run_start +2022-07-30 09:39:55,880 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json +2022-07-30 09:39:57,124 DEBUG HandlerThread:3334444 [meta.py:__init__():40] meta init +2022-07-30 09:39:57,124 DEBUG HandlerThread:3334444 [meta.py:__init__():54] meta init done +2022-07-30 09:39:57,124 DEBUG HandlerThread:3334444 [meta.py:probe():214] probe +2022-07-30 09:39:57,126 DEBUG HandlerThread:3334444 [meta.py:_setup_git():204] setup git +2022-07-30 09:39:57,165 DEBUG HandlerThread:3334444 [meta.py:_setup_git():211] setup git done +2022-07-30 09:39:57,165 DEBUG HandlerThread:3334444 [meta.py:_save_code():92] save code +2022-07-30 09:39:57,178 DEBUG HandlerThread:3334444 [meta.py:_save_code():113] save code done +2022-07-30 09:39:57,178 DEBUG HandlerThread:3334444 [meta.py:_save_patches():130] save patches +2022-07-30 09:39:57,253 DEBUG HandlerThread:3334444 [meta.py:_save_patches():172] save patches done +2022-07-30 09:39:57,253 DEBUG HandlerThread:3334444 [meta.py:_save_pip():58] save pip +2022-07-30 09:39:57,253 DEBUG HandlerThread:3334444 [meta.py:_save_pip():72] save pip done +2022-07-30 09:39:57,254 DEBUG HandlerThread:3334444 [meta.py:probe():252] probe done +2022-07-30 09:39:57,257 DEBUG SenderThread:3334444 [sender.py:send():234] send: files +2022-07-30 09:39:57,257 INFO SenderThread:3334444 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-30 09:39:57,257 INFO SenderThread:3334444 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-30 09:39:57,258 INFO SenderThread:3334444 [sender.py:_save_file():939] saving file diff.patch with policy now +2022-07-30 09:39:57,263 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:39:57,263 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:39:57,708 INFO Thread-11 :3334444 [upload_job.py:push():137] Uploaded file /tmp/tmppikonh_hwandb/1qaw72tn-wandb-metadata.json +2022-07-30 09:39:57,785 INFO Thread-13 :3334444 [upload_job.py:push():137] Uploaded file /tmp/tmppikonh_hwandb/2p365bv0-diff.patch +2022-07-30 09:39:57,882 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/code/run_flax_speech_recognition_ctc.py +2022-07-30 09:39:57,882 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:39:57,882 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/requirements.txt +2022-07-30 09:39:57,882 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/diff.patch +2022-07-30 09:39:57,882 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-metadata.json +2022-07-30 09:39:57,882 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/code +2022-07-30 09:39:57,944 INFO Thread-12 :3334444 [upload_job.py:push():137] Uploaded file /tmp/tmppikonh_hwandb/29x3jl29-code/run_flax_speech_recognition_ctc.py +2022-07-30 09:39:59,883 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:01,884 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:03,885 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:05,886 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:11,889 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:12,396 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:40:12,396 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:40:13,890 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:25,207 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:40:25,895 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:27,532 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:40:27,533 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:40:27,896 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:29,898 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:38,902 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:40,902 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:42,683 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:40:42,684 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:40:52,907 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:54,908 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:40:55,285 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:40:57,849 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:40:57,850 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:41:13,075 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:41:13,075 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:41:25,360 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:41:28,323 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:41:28,324 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:41:34,926 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:36,927 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:38,928 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:41,929 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:43,703 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:41:43,703 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:41:43,930 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:45,931 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:47,932 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:49,933 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:51,934 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:53,935 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:55,434 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:41:55,936 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:57,937 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:41:58,867 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:41:58,868 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:41:59,939 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:01,939 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:03,940 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:05,941 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:07,942 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:09,943 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:11,944 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:13,945 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:14,016 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:42:14,017 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:42:15,946 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:17,948 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:19,949 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:21,950 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:23,951 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:25,506 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:42:25,952 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:27,953 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:29,150 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:42:29,151 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:42:29,954 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:31,955 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:33,956 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:35,957 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:37,958 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:39,959 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:41,960 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:43,961 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:44,296 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:42:44,296 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:42:45,962 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:47,963 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:49,964 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:51,964 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:53,965 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:55,587 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:42:55,967 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:57,968 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:42:59,479 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:42:59,479 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:42:59,969 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:01,970 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:03,970 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:05,971 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:07,972 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:09,973 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:11,974 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:13,976 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:14,639 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:43:14,640 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:43:15,977 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:17,977 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:19,978 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:21,980 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:23,980 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:25,670 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:43:25,983 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:27,982 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:29,983 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:30,043 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:43:30,043 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:43:31,985 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:33,986 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:35,987 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:37,988 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:39,989 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:41,990 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:43,991 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:45,194 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:43:45,195 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:43:45,992 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:47,994 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:49,995 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:51,996 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:53,997 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:55,759 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:43:55,999 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:43:58,000 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:00,001 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:00,353 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:44:00,353 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:44:02,003 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:04,004 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:06,005 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:08,006 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:10,007 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:12,008 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:14,009 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:15,500 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:44:15,500 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:44:16,010 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:18,011 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:21,013 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:23,014 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:25,015 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:25,837 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:44:27,016 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:29,017 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:30,637 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:44:30,638 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:44:31,018 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:33,019 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:35,020 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:37,021 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:39,022 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:41,023 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:43,026 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:45,025 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:45,789 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:44:45,789 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:44:47,033 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:49,034 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:51,035 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:53,036 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:55,038 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:55,924 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:44:57,040 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:44:59,040 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:00,954 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:45:00,954 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:45:01,042 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:03,046 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:05,047 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:07,048 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:09,049 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:11,050 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:13,051 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:15,052 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:16,107 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:45:16,108 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:45:17,054 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:19,055 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:21,056 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:23,057 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:25,058 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:26,013 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:45:27,060 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:29,061 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:31,062 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:31,261 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:45:31,261 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:45:33,063 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:35,064 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:37,065 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:39,067 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:41,068 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:43,069 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:45,070 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:46,405 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:45:46,405 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:45:47,071 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:49,072 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:51,073 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:53,074 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:55,075 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:56,107 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:45:57,076 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:45:59,077 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:01,078 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:01,561 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:46:01,562 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:46:03,079 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:05,080 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:07,081 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:09,082 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:11,083 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:13,084 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:15,085 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:16,711 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:46:16,712 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:46:17,086 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:19,087 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:21,088 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:23,088 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:25,089 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:26,199 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:46:27,090 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:29,091 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:31,092 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:31,856 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:46:31,857 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:46:33,093 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:35,094 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:37,095 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:46:47,022 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:46:47,022 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:46:56,275 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:47:02,166 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:47:02,166 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:47:05,106 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:47:07,107 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:47:09,108 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:47:11,109 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:47:17,304 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:47:17,305 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:47:26,356 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:47:32,438 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:47:32,438 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:47:47,590 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:47:47,590 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:47:50,128 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:47:52,129 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:47:54,129 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:47:56,130 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:47:56,431 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:47:58,131 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:00,132 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:02,133 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:02,743 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:48:02,743 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:48:04,134 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:06,135 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:08,136 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:10,137 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:12,138 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:14,139 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:16,140 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:17,880 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:48:17,880 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:48:18,141 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:20,142 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:26,144 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:26,508 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:48:28,145 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:30,147 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:32,148 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:33,017 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:48:33,017 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:48:34,149 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:48:48,152 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:48:48,152 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:48:56,583 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:49:03,290 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:49:03,291 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:49:14,166 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:16,167 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:18,167 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:18,437 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:49:18,437 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:49:20,168 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:22,169 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:24,170 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:26,171 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:26,659 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:49:28,172 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:30,173 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:32,174 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:33,581 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:49:33,582 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:49:34,175 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:36,175 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:38,176 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:40,177 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:42,179 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:46,180 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:48,181 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:48,718 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:49:48,719 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:49:50,182 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:52,183 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:54,184 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:56,185 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:49:56,735 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:49:58,186 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:00,187 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:02,188 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:03,860 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:50:03,860 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:50:04,189 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:06,190 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:08,191 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:10,192 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:12,193 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:14,194 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:16,195 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:18,196 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:19,030 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:50:19,030 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:50:20,197 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:22,198 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:24,200 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:26,200 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:26,820 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:50:28,201 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:30,202 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:32,203 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:34,181 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:50:34,182 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:50:34,204 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:50:49,313 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:50:49,314 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:50:56,900 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:51:04,449 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:51:04,450 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:51:17,224 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:51:19,623 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:51:19,623 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:51:23,226 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:51:25,227 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:51:26,967 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:51:32,230 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:51:34,923 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:51:34,923 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:51:40,233 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:51:47,237 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:51:50,342 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:51:50,342 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:51:55,240 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:51:57,042 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:51:57,241 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:52:01,243 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:52:05,245 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:52:05,856 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:52:05,856 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:52:07,246 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:52:13,249 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:52:21,214 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:52:21,214 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:52:27,116 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:52:36,379 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:52:36,380 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:52:42,261 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:52:51,264 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:52:51,562 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:52:51,562 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:52:53,265 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:52:57,191 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:53:06,734 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:53:06,734 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:53:21,882 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:53:21,882 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:53:27,266 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:53:37,016 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:53:37,017 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:53:52,149 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:53:52,149 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:53:57,341 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:54:07,285 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:54:07,286 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:54:22,442 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:54:22,442 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:54:27,412 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:54:37,580 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:54:37,581 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:54:52,720 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:54:52,720 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:54:57,486 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:55:07,859 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:55:07,860 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:55:23,001 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:55:23,001 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:55:27,564 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:55:38,143 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:55:38,144 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:55:53,278 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:55:53,278 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:55:57,809 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:56:08,417 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 09:56:08,418 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status +2022-07-30 09:56:13,380 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:56:14,385 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:14,385 DEBUG SenderThread:3334444 [sender.py:send():234] send: telemetry +2022-07-30 09:56:14,386 DEBUG SenderThread:3334444 [sender.py:send():234] send: exit +2022-07-30 09:56:14,386 INFO SenderThread:3334444 [sender.py:send_exit():366] handling exit code: 1 +2022-07-30 09:56:14,388 INFO SenderThread:3334444 [sender.py:send_exit():368] handling runtime: 979 +2022-07-30 09:56:14,388 INFO SenderThread:3334444 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 09:56:14,389 INFO SenderThread:3334444 [sender.py:send_exit():374] send defer +2022-07-30 09:56:14,389 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:14,390 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer +2022-07-30 09:56:14,390 INFO HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-30 09:56:14,390 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: defer +2022-07-30 09:56:14,390 INFO SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-30 09:56:14,390 INFO SenderThread:3334444 [sender.py:transition_state():387] send defer: 1 +2022-07-30 09:56:14,390 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer +2022-07-30 09:56:14,391 INFO HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-30 09:56:14,451 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: defer +2022-07-30 09:56:14,451 INFO SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-30 09:56:14,451 INFO SenderThread:3334444 [sender.py:transition_state():387] send defer: 2 +2022-07-30 09:56:14,452 DEBUG SenderThread:3334444 [sender.py:send():234] send: stats +2022-07-30 09:56:14,452 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer +2022-07-30 09:56:14,452 INFO HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-30 09:56:14,453 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: defer +2022-07-30 09:56:14,453 INFO SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-30 09:56:14,453 INFO SenderThread:3334444 [sender.py:transition_state():387] send defer: 3 +2022-07-30 09:56:14,453 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer +2022-07-30 09:56:14,453 INFO HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-30 09:56:14,453 DEBUG SenderThread:3334444 [sender.py:send():234] send: summary +2022-07-30 09:56:14,454 INFO SenderThread:3334444 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 09:56:14,454 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: defer +2022-07-30 09:56:14,454 INFO SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-30 09:56:14,454 INFO SenderThread:3334444 [sender.py:transition_state():387] send defer: 4 +2022-07-30 09:56:14,454 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer +2022-07-30 09:56:14,454 INFO HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-30 09:56:14,454 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: defer +2022-07-30 09:56:14,454 INFO SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-30 09:56:14,492 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:14,617 INFO SenderThread:3334444 [sender.py:transition_state():387] send defer: 5 +2022-07-30 09:56:14,618 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:14,618 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer +2022-07-30 09:56:14,618 INFO HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-30 09:56:14,618 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: defer +2022-07-30 09:56:14,618 INFO SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-30 09:56:14,619 INFO SenderThread:3334444 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-30 09:56:14,719 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:15,381 INFO Thread-8 :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:56:15,381 INFO SenderThread:3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json +2022-07-30 09:56:15,381 INFO SenderThread:3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/config.yaml +2022-07-30 09:56:15,382 INFO SenderThread:3334444 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files +2022-07-30 09:56:15,382 INFO SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/config.yaml config.yaml +2022-07-30 09:56:15,382 INFO SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/diff.patch diff.patch +2022-07-30 09:56:15,382 INFO SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/requirements.txt requirements.txt +2022-07-30 09:56:15,382 INFO SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log output.log +2022-07-30 09:56:15,385 INFO SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json wandb-summary.json +2022-07-30 09:56:15,386 INFO SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-metadata.json wandb-metadata.json +2022-07-30 09:56:15,389 INFO SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-30 09:56:15,389 INFO SenderThread:3334444 [sender.py:transition_state():387] send defer: 6 +2022-07-30 09:56:15,389 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:15,395 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer +2022-07-30 09:56:15,395 INFO HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-30 09:56:15,395 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: defer +2022-07-30 09:56:15,395 INFO SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-30 09:56:15,395 INFO SenderThread:3334444 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 09:56:15,493 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:15,493 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:15,595 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:15,595 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:15,696 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:15,696 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:15,798 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:15,798 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:15,860 INFO Thread-15 :3334444 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/requirements.txt +2022-07-30 09:56:15,860 INFO Thread-14 :3334444 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/config.yaml +2022-07-30 09:56:15,864 INFO Thread-17 :3334444 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json +2022-07-30 09:56:15,899 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:15,899 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:16,001 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:16,001 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:16,102 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:16,103 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:16,139 INFO Thread-16 :3334444 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log +2022-07-30 09:56:16,204 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:16,204 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:16,305 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:16,306 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:16,339 INFO Thread-7 :3334444 [sender.py:transition_state():387] send defer: 7 +2022-07-30 09:56:16,340 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer +2022-07-30 09:56:16,340 INFO HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-30 09:56:16,340 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: defer +2022-07-30 09:56:16,340 INFO SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-30 09:56:16,407 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:16,780 INFO SenderThread:3334444 [sender.py:transition_state():387] send defer: 8 +2022-07-30 09:56:16,780 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:16,781 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer +2022-07-30 09:56:16,781 INFO HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-30 09:56:16,781 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: defer +2022-07-30 09:56:16,781 INFO SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-30 09:56:16,781 INFO SenderThread:3334444 [sender.py:transition_state():387] send defer: 9 +2022-07-30 09:56:16,782 DEBUG SenderThread:3334444 [sender.py:send():234] send: final +2022-07-30 09:56:16,782 DEBUG SenderThread:3334444 [sender.py:send():234] send: footer +2022-07-30 09:56:16,782 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer +2022-07-30 09:56:16,782 INFO HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-30 09:56:16,782 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: defer +2022-07-30 09:56:16,782 INFO SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-30 09:56:16,882 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 09:56:16,882 DEBUG SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 09:56:16,882 INFO SenderThread:3334444 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 09:56:17,140 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: get_summary +2022-07-30 09:56:17,141 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-30 09:56:17,141 DEBUG HandlerThread:3334444 [handler.py:handle_request():130] handle_request: shutdown +2022-07-30 09:56:17,141 INFO HandlerThread:3334444 [handler.py:finish():731] shutting down handler +2022-07-30 09:56:17,782 INFO WriterThread:3334444 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/run-16dexcvn.wandb +2022-07-30 09:56:18,139 INFO SenderThread:3334444 [sender.py:finish():1070] shutting down sender +2022-07-30 09:56:18,139 INFO SenderThread:3334444 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 09:56:18,139 INFO SenderThread:3334444 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 09:56:18,142 INFO MainThread:3334444 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220730_093953-16dexcvn/logs/debug.log b/wandb/run-20220730_093953-16dexcvn/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..84df8b2c6657d2d40292d619d99e980e38dd7d14 --- /dev/null +++ b/wandb/run-20220730_093953-16dexcvn/logs/debug.log @@ -0,0 +1,157 @@ +2022-07-30 09:39:53,728 INFO MainThread:3333166 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-30 09:39:53,728 INFO MainThread:3333166 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-30 09:39:53,728 INFO MainThread:3333166 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/logs/debug.log +2022-07-30 09:39:53,728 INFO MainThread:3333166 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/logs/debug-internal.log +2022-07-30 09:39:53,728 INFO MainThread:3333166 [wandb_init.py:init():404] calling init triggers +2022-07-30 09:39:53,728 INFO MainThread:3333166 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-30 09:39:53,728 INFO MainThread:3333166 [wandb_init.py:init():460] starting backend +2022-07-30 09:39:53,728 INFO MainThread:3333166 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-30 09:39:53,775 INFO MainThread:3333166 [backend.py:ensure_launched():216] starting backend process... +2022-07-30 09:39:53,821 INFO MainThread:3333166 [backend.py:ensure_launched():221] started backend process with pid: 3334444 +2022-07-30 09:39:53,823 INFO MainThread:3333166 [wandb_init.py:init():469] backend started and connected +2022-07-30 09:39:53,837 INFO MainThread:3333166 [wandb_init.py:init():533] updated telemetry +2022-07-30 09:39:53,952 INFO MainThread:3333166 [wandb_init.py:init():563] communicating current version +2022-07-30 09:39:54,674 INFO MainThread:3333166 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-30 09:39:54,674 INFO MainThread:3333166 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-30 09:39:54,876 INFO MainThread:3333166 [wandb_init.py:init():606] starting run threads in backend +2022-07-30 09:39:57,262 INFO MainThread:3333166 [wandb_run.py:_console_start():1810] atexit reg +2022-07-30 09:39:57,262 INFO MainThread:3333166 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-30 09:39:57,263 INFO MainThread:3333166 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-30 09:39:57,265 INFO MainThread:3333166 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-30 09:39:57,265 INFO MainThread:3333166 [wandb_init.py:init():633] run started, returning control to user process +2022-07-30 09:56:12,210 INFO MainThread:3333166 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-30 09:56:12,216 INFO MainThread:3333166 [wandb_run.py:_restore():1752] restore +2022-07-30 09:56:14,390 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 74317 +} + +2022-07-30 09:56:14,618 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 74317 +} + +2022-07-30 09:56:15,392 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 5 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 375734 +} + +2022-07-30 09:56:15,494 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74317 + total_bytes: 375762 +} + +2022-07-30 09:56:15,595 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 375762 + total_bytes: 375762 +} + +2022-07-30 09:56:15,697 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 375762 + total_bytes: 375762 +} + +2022-07-30 09:56:15,798 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 375762 + total_bytes: 375762 +} + +2022-07-30 09:56:15,900 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 375762 + total_bytes: 375762 +} + +2022-07-30 09:56:16,002 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 375762 + total_bytes: 375762 +} + +2022-07-30 09:56:16,103 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 375762 + total_bytes: 375762 +} + +2022-07-30 09:56:16,205 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 375762 + total_bytes: 375762 +} + +2022-07-30 09:56:16,306 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 375762 + total_bytes: 375762 +} + +2022-07-30 09:56:16,781 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 375762 + total_bytes: 375762 +} + +2022-07-30 09:56:17,139 INFO MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 375762 + total_bytes: 375762 +} +local_info { +} + +2022-07-30 09:56:18,716 INFO MainThread:3333166 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220730_093953-16dexcvn/run-16dexcvn.wandb b/wandb/run-20220730_093953-16dexcvn/run-16dexcvn.wandb new file mode 100644 index 0000000000000000000000000000000000000000..d52e071e7cfc7360c4146cb58590c19398708948 --- /dev/null +++ b/wandb/run-20220730_093953-16dexcvn/run-16dexcvn.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea59404e0e117ed431b8d34e151d0ec69f1ba66330a1ea2ea4d8a2bdad59c8ff +size 418608 diff --git a/wandb/run-20220730_111754-bhdpxdi4/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_111754-bhdpxdi4/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac --- /dev/null +++ b/wandb/run-20220730_111754-bhdpxdi4/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220730_111754-bhdpxdi4/files/config.yaml b/wandb/run-20220730_111754-bhdpxdi4/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc145b9e10df828754a63ef17390f3f5ec5573f7 --- /dev/null +++ b/wandb/run-20220730_111754-bhdpxdi4/files/config.yaml @@ -0,0 +1,27 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659179874 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220730_111754-bhdpxdi4/files/diff.patch b/wandb/run-20220730_111754-bhdpxdi4/files/diff.patch new file mode 100644 index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642 --- /dev/null +++ b/wandb/run-20220730_111754-bhdpxdi4/files/diff.patch @@ -0,0 +1,10 @@ +diff --git a/.gitattributes b/.gitattributes +index 755356a..f0eef4b 100644 +--- a/.gitattributes ++++ b/.gitattributes +@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text + *.zip filter=lfs diff=lfs merge=lfs -text + *.zstandard filter=lfs diff=lfs merge=lfs -text + *tfevents* filter=lfs diff=lfs merge=lfs -text ++*.wandb filter=lfs diff=lfs merge=lfs -text +\ No newline at end of file diff --git a/wandb/run-20220730_111754-bhdpxdi4/files/output.log b/wandb/run-20220730_111754-bhdpxdi4/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..5416b7792aa2700437aa8a0b76949f8e9e74e2c6 --- /dev/null +++ b/wandb/run-20220730_111754-bhdpxdi4/files/output.log @@ -0,0 +1,736 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul30_11-17-50_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=24, +per_device_train_batch_size=24, +precision=full_mixed, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 48.08it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 445.81it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('quantizer', 'weight_proj', 'bias'), ('project_hid', 'bias'), ('project_q', 'kernel'), ('project_q', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'codevectors'), ('project_hid', 'kernel')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8750.90ex/s] +removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8172.52ex/s] +removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8842.39ex/s] +removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7248.79ex/s] +removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8055.10ex/s] +removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7504.68ex/s] +removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7551.02ex/s] +removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8173.02ex/s] +removing punctuation from train split #7: 66%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 6262/9523 [00:00<00:00, 7376.88ex/s] +removing punctuation from train split #5: 95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9056/9523 [00:01<00:00, 8176.29ex/s] +removing punctuation from train split #6: 86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8222/9523 [00:01<00:00, 7796.95ex/s] +removing punctuation from train split #8: 79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 7495/9523 [00:00<00:00, 8188.42ex/s] +removing punctuation from train split #6: 95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9068/9523 [00:01<00:00, 7989.38ex/s] +removing punctuation from train split #7: 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8440/9523 [00:01<00:00, 6953.64ex/s] +removing punctuation from train split #8: 97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9191/9523 [00:01<00:00, 8334.11ex/s] +removing punctuation from train split #10: 83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 7893/9523 [00:00<00:00, 9072.25ex/s] +removing punctuation from train split #10: 93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 8860/9523 [00:01<00:00, 9254.14ex/s] +removing punctuation from train split #9: 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9030/9523 [00:01<00:00, 8244.93ex/s] +removing punctuation from train split #15: 42%|██████████████████████████████████████████████████████████████████████████████████▍ | 4004/9522 [00:00<00:00, 8124.94ex/s] +removing punctuation from train split #11: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8200/9523 [00:01<00:00, 8162.87ex/s] +removing punctuation from train split #12: 78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7420/9522 [00:00<00:00, 8095.21ex/s] +removing punctuation from train split #15: 60%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 5667/9522 [00:00<00:00, 8223.08ex/s] +removing punctuation from train split #16: 59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 5595/9522 [00:00<00:00, 8108.71ex/s] +removing punctuation from train split #17: 50%|██████████████████████████████████████████████████████████████████████████████████████████████████▋ | 4795/9522 [00:00<00:00, 8092.70ex/s] +removing punctuation from train split #18: 40%|███████████████████████████████████████████████████████████████████████████████ | 3844/9522 [00:00<00:00, 6126.31ex/s] +removing punctuation from train split #17: 59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 5635/9522 [00:00<00:00, 8191.52ex/s] +removing punctuation from train split #20: 34%|██████████████████████████████████████████████████████████████████▊ | 3245/9522 [00:00<00:00, 8190.98ex/s] +removing punctuation from train split #21: 34%|█████████████████████████████████████████████████████████████████▋ | 3193/9522 [00:00<00:00, 8091.29ex/s] +removing punctuation from train split #20: 43%|███████████████████████████████████████████████████████████████████████████████████▉ | 4080/9522 [00:00<00:00, 8246.49ex/s] +removing punctuation from train split #23: 17%|█████████████████████████████████▏ | 1613/9522 [00:00<00:00, 8106.45ex/s] +removing punctuation from train split #22: 34%|███████████████████████████████████████████████████████████████████▏ | 3266/9522 [00:00<00:00, 8245.94ex/s] +removing punctuation from train split #23: 26%|██████████████████████████████████████████████████▋ | 2460/9522 [00:00<00:00, 8268.25ex/s] +removing punctuation from train split #26: 8%|███████████████▌ | 755/9522 [00:00<00:01, 7549.31ex/s] +removing punctuation from train split #20: 69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 6571/9522 [00:00<00:00, 7735.32ex/s] +removing punctuation from train split #18: 79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7562/9522 [00:01<00:00, 7402.62ex/s] +removing punctuation from train split #20: 78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 7440/9522 [00:00<00:00, 8018.52ex/s] +removing punctuation from train split #18: 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 8471/9522 [00:01<00:00, 7895.82ex/s] +removing punctuation from train split #18: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9426/9522 [00:01<00:00, 8382.41ex/s] +removing punctuation from train split #20: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8335/9522 [00:01<00:00, 8296.36ex/s] +removing punctuation from train split #20: 97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9281/9522 [00:01<00:00, 8643.22ex/s] +removing punctuation from train split #22: 78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 7462/9522 [00:00<00:00, 7997.95ex/s] +removing punctuation from train split #22: 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8377/9522 [00:01<00:00, 8340.55ex/s] +removing punctuation from train split #21: 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9196/9522 [00:01<00:00, 6344.71ex/s] +removing punctuation from train split #22: 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9217/9522 [00:01<00:00, 7164.89ex/s] +removing punctuation from train split #23: 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8472/9522 [00:01<00:00, 7128.14ex/s] +removing punctuation from train split #24: 79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 7478/9522 [00:01<00:00, 6169.29ex/s] +removing punctuation from train split #23: 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9404/9522 [00:01<00:00, 7711.35ex/s] +removing punctuation from train split #24: 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8390/9522 [00:01<00:00, 6894.92ex/s] +removing punctuation from train split #24: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9311/9522 [00:01<00:00, 7495.91ex/s] +removing punctuation from train split #25: 93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8820/9522 [00:01<00:00, 8213.57ex/s] +removing punctuation from train split #26: 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9392/9522 [00:01<00:00, 8332.47ex/s] +removing punctuation from train split #27: 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8543/9522 [00:01<00:00, 8777.93ex/s] +removing punctuation from train split #28: 71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 6775/9522 [00:00<00:00, 8187.29ex/s] +removing punctuation from train split #29: 55%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 5277/9522 [00:00<00:00, 7516.57ex/s] +removing punctuation from train split #27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 9506/9522 [00:01<00:00, 9030.56ex/s] +removing punctuation from train split #29: 66%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 6242/9522 [00:00<00:00, 8130.78ex/s] +removing punctuation from train split #28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 9501/9522 [00:01<00:00, 8573.55ex/s] +removing punctuation from train split #29: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8184/9522 [00:00<00:00, 8922.41ex/s] +removing punctuation from train split #29: 96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9148/9522 [00:01<00:00, 9134.35ex/s] +removing punctuation from train split #31: 67%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 6397/9522 [00:00<00:00, 8283.49ex/s] +removing punctuation from train split #30: 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9204/9522 [00:01<00:00, 8400.18ex/s] +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow9285/9522 [00:01<00:00, 9150.66ex/s] +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow +preprocess dataset #0: 0%|▏ | 10/9497 [00:01<10:51, 14.55ex/s] +preprocess dataset #1: 0%| | 0/9497 [00:00=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220730_113845-2hglxdx5/files/config.yaml b/wandb/run-20220730_113845-2hglxdx5/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4466091c28ac9a503d2d5308e13c26a93862e56e --- /dev/null +++ b/wandb/run-20220730_113845-2hglxdx5/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659181126 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220730_113845-2hglxdx5/files/diff.patch b/wandb/run-20220730_113845-2hglxdx5/files/diff.patch new file mode 100644 index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642 --- /dev/null +++ b/wandb/run-20220730_113845-2hglxdx5/files/diff.patch @@ -0,0 +1,10 @@ +diff --git a/.gitattributes b/.gitattributes +index 755356a..f0eef4b 100644 +--- a/.gitattributes ++++ b/.gitattributes +@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text + *.zip filter=lfs diff=lfs merge=lfs -text + *.zstandard filter=lfs diff=lfs merge=lfs -text + *tfevents* filter=lfs diff=lfs merge=lfs -text ++*.wandb filter=lfs diff=lfs merge=lfs -text +\ No newline at end of file diff --git a/wandb/run-20220730_113845-2hglxdx5/files/output.log b/wandb/run-20220730_113845-2hglxdx5/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..2073ab99f98da47dd8fec496d9d1dd48b339d758 --- /dev/null +++ b/wandb/run-20220730_113845-2hglxdx5/files/output.log @@ -0,0 +1,1628 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul30_11-38-41_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=24, +per_device_train_batch_size=24, +precision=full_mixed, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 79.80it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 457.11it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'bias'), ('project_q', 'bias'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'codevectors'), ('project_hid', 'kernel')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 5%|██████████▋ | 514/9523 [00:00<00:01, 5134.30ex/s] +removing punctuation from train split #1: 0%| | 0/9523 [00:00 to the vocabulary +Adding to the vocabulary +2022-07-30 11:50:54.715424: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2022-07-30 11:50:54.715480: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) +/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) +INFO:__main__:***** Running training ***** +INFO:__main__: Num examples = 302693 +INFO:__main__: Num Epochs = 40 +INFO:__main__: Instantaneous batch size per device = 24 +INFO:__main__: Num gradient accumulation steps = 1 +INFO:__main__: Total train batch size (w. parallel & distributed) = 192 +INFO:__main__: Total optimization steps = 63040 +INFO:__main__: Gradient checkpointing: True +INFO:__main__: Use scan: False +INFO:__main__: Fuse matmuls: False +Epoch ... (1/40): 0%| | 0/40 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) + File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss + out_tree, out_flat = f_pmapped_(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f + out = pxla.xla_pmap( + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind + return map_bind(self, fun, *args, **params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process + return trace.process_map(self, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call + return primitive.impl(f, *tracers, **params) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl + compiled_fun, fingerprint = parallel_callable( + File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun + ans = call(fun, *args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable + pmap_executable = pmap_computation.compile() + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile + self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo + compiled = dispatch.compile_or_get_cached( + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached + return backend_compile(backend, computation, compile_options, host_callbacks) + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile + return backend.compile(built_c, compile_options=options) +jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 15.59G of 15.48G hbm. Exceeded hbm capacity by 108.18M. +Total hbm usage >= 16.11G: + reserved 530.00M + program 8.36G + arguments 7.23G +Output size 7.17G; shares 7.17G with arguments. +Program hbm requirement 8.36G: + global 260.0K + scoped 69.20M + HLO temp 7.31G (98.6% utilization: Unpadded (6.97G) Padded (7.07G), 3.3% fragmentation (246.53M)) + overlays 1003.68M + Largest program allocations in hbm: + 1. Size: 1003.68M + XLA label: overlays + Allocation type: overlays + ========================== + 2. Size: 750.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)} + Unpadded size: 730.96M + Extra memory due to padding: 19.04M (1.0x expansion) + XLA label: fusion.23118 = fusion(fusion.15347, bitcast.675, bitcast.677), kind=kOutput, calls=fused_computation.21200 + Allocation type: HLO temp + ========================== + 3. Size: 750.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/jit(_einsum)/dot_general[dimension_numbers=(((2,), (3,)), ((0, 1), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387 + Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)} + Unpadded size: 730.96M + Extra memory due to padding: 19.04M (1.0x expansion) + XLA label: fusion.372 = fusion(get-tuple-element.11691, get-tuple-element.11690, fusion.13842, bitcast.685, ...(+1)), kind=kOutput, calls=fused_computation.371 + Allocation type: HLO temp + ========================== + 4. Size: 750.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/mul" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=107 + Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)} + Unpadded size: 730.96M + Extra memory due to padding: 19.04M (1.0x expansion) + XLA label: fusion.371 = fusion(get-tuple-element.11695, get-tuple-element.11691, get-tuple-element.11690, negate.140, ...(+1)), kind=kLoop, calls=fused_computation.370 + Allocation type: HLO temp + ========================== + 5. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/dropout/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/stochastic.py" source_line=69 + Shape: u32[12,999,1280]{2,0,1:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 19.51M (1.3x expansion) + XLA label: fusion.6015 = fusion(xor.5514, bitcast.32, fusion.5702, bitcast.31, ...(+3)), kind=kLoop, calls=fused_computation.5858 + Allocation type: HLO temp + ========================== + 6. Size: 67.50M + Shape: bf16[24,1127,16,80]{1,3,2,0:T(8,128)(2,1)} + Unpadded size: 66.04M + Extra memory due to padding: 1.46M (1.0x expansion) + XLA label: copy.7694.remat_compressed = copy(copy.7694) + Allocation type: HLO temp + ========================== + 7. Size: 60.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/k_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200 + Shape: bf16[24,999,1280]{1,2,0:T(8,128)(2,1)} + Unpadded size: 58.54M + Extra memory due to padding: 1.46M (1.0x expansion) + XLA label: copy.8152.remat = copy(fusion.7393) + Allocation type: HLO temp + ========================== + 8. Size: 60.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/div" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=93 + Shape: bf16[24,999,1280]{1,2,0:T(8,128)(2,1)} + Unpadded size: 58.54M + Extra memory due to padding: 1.46M (1.0x expansion) + XLA label: copy.8151.remat = copy(fusion.7395) + Allocation type: HLO temp + ========================== + 9. Size: 58.59M + Shape: u32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.5948.remat_compressed = copy(fusion.5948) + Allocation type: HLO temp + ========================== + 10. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/25/25/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4030 = fusion(get-tuple-element.11572, convert.3623, convert.3624, fusion.3050), kind=kOutput, calls=fused_computation.3876 + Allocation type: HLO temp + ========================== + 11. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4186 = fusion(fusion.6213, copy.9974, convert.4093, fusion.5973, ...(+1)), kind=kLoop, calls=fused_computation.4032 + Allocation type: HLO temp + ========================== + 12. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/41/41/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.3934 = fusion(get-tuple-element.11636, convert.3911, convert.3912, fusion.3082), kind=kOutput, calls=fused_computation.3780 + Allocation type: HLO temp + ========================== + 13. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/40/40/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.3940 = fusion(get-tuple-element.11632, convert.3895, convert.3896, fusion.3080), kind=kOutput, calls=fused_computation.3786 + Allocation type: HLO temp + ========================== + 14. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4126 = fusion(get-tuple-element.11508, convert.4087, convert.4088, fusion.3018), kind=kOutput, calls=fused_computation.3972 + Allocation type: HLO temp + ========================== + 15. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/39/39/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.3946 = fusion(get-tuple-element.11628, convert.3863, convert.3864, fusion.3078), kind=kOutput, calls=fused_computation.3792 + Allocation type: HLO temp + ========================== + 16. Size: 58.54M + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: copy.9974 = copy(bitcast.9495) + Allocation type: HLO temp + ========================== + 17. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/23/23/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4042 = fusion(get-tuple-element.11564, convert.3591, convert.3592, fusion.3046), kind=kOutput, calls=fused_computation.3888 + Allocation type: HLO temp + ========================== + 18. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4138 = fusion(get-tuple-element.11500, convert.4055, convert.4056, fusion.3014), kind=kOutput, calls=fused_computation.3984 + Allocation type: HLO temp + ========================== + 19. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/42/42/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.3928 = fusion(get-tuple-element.11640, convert.3927, convert.3928, fusion.3084), kind=kOutput, calls=fused_computation.3774 + Allocation type: HLO temp + ========================== + 20. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4120 = fusion(get-tuple-element.11512, convert.3367, convert.3368, fusion.3020), kind=kOutput, calls=fused_computation.3966 + Allocation type: HLO temp + ========================== +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1605, in + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) +jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 15.59G of 15.48G hbm. Exceeded hbm capacity by 108.18M. +Total hbm usage >= 16.11G: + reserved 530.00M + program 8.36G + arguments 7.23G +Output size 7.17G; shares 7.17G with arguments. +Program hbm requirement 8.36G: + global 260.0K + scoped 69.20M + HLO temp 7.31G (98.6% utilization: Unpadded (6.97G) Padded (7.07G), 3.3% fragmentation (246.53M)) + overlays 1003.68M + Largest program allocations in hbm: + 1. Size: 1003.68M + XLA label: overlays + Allocation type: overlays + ========================== + 2. Size: 750.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95 + Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)} + Unpadded size: 730.96M + Extra memory due to padding: 19.04M (1.0x expansion) + XLA label: fusion.23118 = fusion(fusion.15347, bitcast.675, bitcast.677), kind=kOutput, calls=fused_computation.21200 + Allocation type: HLO temp + ========================== + 3. Size: 750.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/jit(_einsum)/dot_general[dimension_numbers=(((2,), (3,)), ((0, 1), (0, 2))) precision=(, ) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387 + Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)} + Unpadded size: 730.96M + Extra memory due to padding: 19.04M (1.0x expansion) + XLA label: fusion.372 = fusion(get-tuple-element.11691, get-tuple-element.11690, fusion.13842, bitcast.685, ...(+1)), kind=kOutput, calls=fused_computation.371 + Allocation type: HLO temp + ========================== + 4. Size: 750.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/mul" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=107 + Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)} + Unpadded size: 730.96M + Extra memory due to padding: 19.04M (1.0x expansion) + XLA label: fusion.371 = fusion(get-tuple-element.11695, get-tuple-element.11691, get-tuple-element.11690, negate.140, ...(+1)), kind=kLoop, calls=fused_computation.370 + Allocation type: HLO temp + ========================== + 5. Size: 78.05M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/dropout/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/stochastic.py" source_line=69 + Shape: u32[12,999,1280]{2,0,1:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 19.51M (1.3x expansion) + XLA label: fusion.6015 = fusion(xor.5514, bitcast.32, fusion.5702, bitcast.31, ...(+3)), kind=kLoop, calls=fused_computation.5858 + Allocation type: HLO temp + ========================== + 6. Size: 67.50M + Shape: bf16[24,1127,16,80]{1,3,2,0:T(8,128)(2,1)} + Unpadded size: 66.04M + Extra memory due to padding: 1.46M (1.0x expansion) + XLA label: copy.7694.remat_compressed = copy(copy.7694) + Allocation type: HLO temp + ========================== + 7. Size: 60.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/k_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200 + Shape: bf16[24,999,1280]{1,2,0:T(8,128)(2,1)} + Unpadded size: 58.54M + Extra memory due to padding: 1.46M (1.0x expansion) + XLA label: copy.8152.remat = copy(fusion.7393) + Allocation type: HLO temp + ========================== + 8. Size: 60.00M + Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/div" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=93 + Shape: bf16[24,999,1280]{1,2,0:T(8,128)(2,1)} + Unpadded size: 58.54M + Extra memory due to padding: 1.46M (1.0x expansion) + XLA label: copy.8151.remat = copy(fusion.7395) + Allocation type: HLO temp + ========================== + 9. Size: 58.59M + Shape: u32[12,999,1280]{2,1,0:T(8,128)} + Unpadded size: 58.54M + Extra memory due to padding: 60.0K (1.0x expansion) + XLA label: fusion.5948.remat_compressed = copy(fusion.5948) + Allocation type: HLO temp + ========================== + 10. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/25/25/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4030 = fusion(get-tuple-element.11572, convert.3623, convert.3624, fusion.3050), kind=kOutput, calls=fused_computation.3876 + Allocation type: HLO temp + ========================== + 11. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4186 = fusion(fusion.6213, copy.9974, convert.4093, fusion.5973, ...(+1)), kind=kLoop, calls=fused_computation.4032 + Allocation type: HLO temp + ========================== + 12. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/41/41/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.3934 = fusion(get-tuple-element.11636, convert.3911, convert.3912, fusion.3082), kind=kOutput, calls=fused_computation.3780 + Allocation type: HLO temp + ========================== + 13. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/40/40/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.3940 = fusion(get-tuple-element.11632, convert.3895, convert.3896, fusion.3080), kind=kOutput, calls=fused_computation.3786 + Allocation type: HLO temp + ========================== + 14. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4126 = fusion(get-tuple-element.11508, convert.4087, convert.4088, fusion.3018), kind=kOutput, calls=fused_computation.3972 + Allocation type: HLO temp + ========================== + 15. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/39/39/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.3946 = fusion(get-tuple-element.11628, convert.3863, convert.3864, fusion.3078), kind=kOutput, calls=fused_computation.3792 + Allocation type: HLO temp + ========================== + 16. Size: 58.54M + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: copy.9974 = copy(bitcast.9495) + Allocation type: HLO temp + ========================== + 17. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/23/23/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4042 = fusion(get-tuple-element.11564, convert.3591, convert.3592, fusion.3046), kind=kOutput, calls=fused_computation.3888 + Allocation type: HLO temp + ========================== + 18. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4138 = fusion(get-tuple-element.11500, convert.4055, convert.4056, fusion.3014), kind=kOutput, calls=fused_computation.3984 + Allocation type: HLO temp + ========================== + 19. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/42/42/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.3928 = fusion(get-tuple-element.11640, convert.3927, convert.3928, fusion.3084), kind=kOutput, calls=fused_computation.3774 + Allocation type: HLO temp + ========================== + 20. Size: 58.54M + Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(, ) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196 + Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)} + Unpadded size: 58.54M + XLA label: fusion.4120 = fusion(get-tuple-element.11512, convert.3367, convert.3368, fusion.3020), kind=kOutput, calls=fused_computation.3966 + Allocation type: HLO temp + ========================== \ No newline at end of file diff --git a/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt b/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220730_113845-2hglxdx5/files/wandb-metadata.json b/wandb/run-20220730_113845-2hglxdx5/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f621797030f9d0f561a2f1d05f7913e14e57cce2 --- /dev/null +++ b/wandb/run-20220730_113845-2hglxdx5/files/wandb-metadata.json @@ -0,0 +1,67 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-30T11:38:49.614640", + "startedAt": "2022-07-30T11:38:45.979578", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=./", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=24", + "--per_device_eval_batch_size=24", + "--gradient_accumulation_steps=1", + "--precision=full_mixed", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json b/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..0bcd2640a40d13eaefc952e887ce52089db5ac91 --- /dev/null +++ b/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 1019}} \ No newline at end of file diff --git a/wandb/run-20220730_113845-2hglxdx5/logs/debug-internal.log b/wandb/run-20220730_113845-2hglxdx5/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..7e039a76dd1c062d1d83f1ab0533cb02cea5527f --- /dev/null +++ b/wandb/run-20220730_113845-2hglxdx5/logs/debug-internal.log @@ -0,0 +1,577 @@ +2022-07-30 11:38:46,907 INFO MainThread:3087125 [internal.py:wandb_internal():87] W&B internal server running at pid: 3087125, started at: 2022-07-30 11:38:46.906982 +2022-07-30 11:38:46,909 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: check_version +2022-07-30 11:38:46,909 INFO WriterThread:3087125 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/run-2hglxdx5.wandb +2022-07-30 11:38:46,910 DEBUG SenderThread:3087125 [sender.py:send():234] send: header +2022-07-30 11:38:46,910 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: check_version +2022-07-30 11:38:46,947 DEBUG SenderThread:3087125 [sender.py:send():234] send: run +2022-07-30 11:38:47,301 INFO SenderThread:3087125 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files +2022-07-30 11:38:47,301 INFO SenderThread:3087125 [sender.py:_start_run_threads():804] run started: 2hglxdx5 with start time 1659181126 +2022-07-30 11:38:47,301 DEBUG SenderThread:3087125 [sender.py:send():234] send: summary +2022-07-30 11:38:47,301 INFO SenderThread:3087125 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 11:38:47,302 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: run_start +2022-07-30 11:38:48,304 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json +2022-07-30 11:38:49,614 DEBUG HandlerThread:3087125 [meta.py:__init__():40] meta init +2022-07-30 11:38:49,614 DEBUG HandlerThread:3087125 [meta.py:__init__():54] meta init done +2022-07-30 11:38:49,614 DEBUG HandlerThread:3087125 [meta.py:probe():214] probe +2022-07-30 11:38:49,616 DEBUG HandlerThread:3087125 [meta.py:_setup_git():204] setup git +2022-07-30 11:38:49,653 DEBUG HandlerThread:3087125 [meta.py:_setup_git():211] setup git done +2022-07-30 11:38:49,653 DEBUG HandlerThread:3087125 [meta.py:_save_code():92] save code +2022-07-30 11:38:49,666 DEBUG HandlerThread:3087125 [meta.py:_save_code():113] save code done +2022-07-30 11:38:49,666 DEBUG HandlerThread:3087125 [meta.py:_save_patches():130] save patches +2022-07-30 11:38:49,740 DEBUG HandlerThread:3087125 [meta.py:_save_patches():172] save patches done +2022-07-30 11:38:49,740 DEBUG HandlerThread:3087125 [meta.py:_save_pip():58] save pip +2022-07-30 11:38:49,741 DEBUG HandlerThread:3087125 [meta.py:_save_pip():72] save pip done +2022-07-30 11:38:49,741 DEBUG HandlerThread:3087125 [meta.py:probe():252] probe done +2022-07-30 11:38:49,744 DEBUG SenderThread:3087125 [sender.py:send():234] send: files +2022-07-30 11:38:49,744 INFO SenderThread:3087125 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-30 11:38:49,744 INFO SenderThread:3087125 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-30 11:38:49,745 INFO SenderThread:3087125 [sender.py:_save_file():939] saving file diff.patch with policy now +2022-07-30 11:38:49,750 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:38:49,750 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:38:50,213 INFO Thread-11 :3087125 [upload_job.py:push():137] Uploaded file /tmp/tmpqzgmy2k7wandb/fzrkrmxo-wandb-metadata.json +2022-07-30 11:38:50,278 INFO Thread-13 :3087125 [upload_job.py:push():137] Uploaded file /tmp/tmpqzgmy2k7wandb/2c98i3yk-diff.patch +2022-07-30 11:38:50,305 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-metadata.json +2022-07-30 11:38:50,305 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/code/run_flax_speech_recognition_ctc.py +2022-07-30 11:38:50,305 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:38:50,305 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt +2022-07-30 11:38:50,305 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/diff.patch +2022-07-30 11:38:50,306 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/code +2022-07-30 11:38:50,435 INFO Thread-12 :3087125 [upload_job.py:push():137] Uploaded file /tmp/tmpqzgmy2k7wandb/11tmoa1l-code/run_flax_speech_recognition_ctc.py +2022-07-30 11:38:52,306 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:38:54,306 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:38:56,308 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:38:58,309 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:39:04,312 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:39:04,903 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:39:04,903 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:39:06,313 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:39:17,697 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:39:20,035 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:39:20,035 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:39:20,319 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:39:22,320 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:39:33,326 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:39:35,185 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:39:35,185 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:39:35,327 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:39:37,328 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:39:47,773 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:39:49,333 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:39:50,318 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:39:50,319 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:39:51,334 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:05,481 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:40:05,481 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:40:17,851 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:40:20,655 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:40:20,655 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:40:32,352 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:35,353 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:35,834 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:40:35,834 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:40:37,354 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:39,355 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:41,356 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:43,357 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:45,357 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:47,358 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:47,918 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:40:49,359 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:51,011 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:40:51,011 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:40:51,360 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:53,361 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:55,362 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:57,363 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:40:59,364 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:01,365 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:03,366 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:05,367 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:06,165 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:41:06,165 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:41:07,368 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:09,368 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:11,369 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:13,370 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:15,372 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:17,373 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:18,003 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:41:19,373 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:21,320 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:41:21,320 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:41:21,374 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:23,375 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:25,376 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:27,377 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:29,378 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:31,379 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:33,380 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:35,381 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:36,463 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:41:36,463 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:41:37,382 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:39,383 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:41,384 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:43,385 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:45,386 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:47,387 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:48,082 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:41:49,389 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:51,390 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:51,606 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:41:51,606 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:41:53,391 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:55,392 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:57,393 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:41:59,394 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:01,395 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:03,402 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:05,397 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:06,746 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:42:06,746 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:42:07,398 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:09,400 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:11,400 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:13,401 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:15,402 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:17,403 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:18,159 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:42:19,404 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:21,405 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:21,926 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:42:21,926 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:42:23,406 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:25,407 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:27,408 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:29,409 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:31,410 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:33,411 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:35,412 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:37,069 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:42:37,070 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:42:37,413 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:39,414 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:41,415 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:43,416 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:45,417 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:47,417 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:48,248 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:42:50,419 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:52,212 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:42:52,212 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:42:52,420 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:54,421 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:56,422 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:42:58,423 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:00,424 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:02,425 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:04,426 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:06,427 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:07,373 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:43:07,373 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:43:08,429 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:10,430 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:12,430 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:14,432 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:16,432 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:18,340 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:43:18,433 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:20,435 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:22,436 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:22,519 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:43:22,519 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:43:24,437 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:26,438 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:28,438 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:30,440 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:32,441 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:34,442 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:36,443 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:37,681 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:43:37,681 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:43:38,444 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:40,445 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:42,450 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:44,451 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:46,451 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:48,428 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:43:48,452 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:50,453 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:52,454 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:52,858 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:43:52,858 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:43:54,455 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:56,456 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:43:58,457 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:00,458 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:02,459 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:04,460 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:06,461 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:08,010 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:44:08,011 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:44:08,462 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:10,463 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:12,464 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:14,465 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:16,466 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:18,467 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:18,507 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:44:20,468 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:22,469 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:23,154 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:44:23,155 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:44:24,470 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:26,472 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:28,472 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:30,474 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:32,474 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:34,475 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:36,479 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:38,297 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:44:38,298 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:44:38,480 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:40,480 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:42,481 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:44,482 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:46,483 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:48,484 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:48,597 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:44:50,485 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:52,486 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:53,438 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:44:53,438 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:44:54,487 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:56,488 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:44:58,489 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:00,490 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:02,491 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:04,492 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:06,493 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:08,494 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:08,579 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:45:08,579 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:45:10,495 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:12,496 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:14,497 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:16,498 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:18,499 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:18,676 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:45:20,500 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:22,501 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:23,730 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:45:23,730 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:45:24,502 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:26,503 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:28,504 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:30,506 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:32,506 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:34,507 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:36,508 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:38,509 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:38,869 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:45:38,869 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:45:40,510 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:42,511 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:44,512 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:46,513 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:48,516 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:48,757 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:45:50,517 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:52,518 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:54,005 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:45:54,006 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:45:54,519 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:56,521 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:45:58,521 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:00,522 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:02,523 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:04,524 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:06,525 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:09,152 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:46:09,190 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:46:18,828 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:46:24,330 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:46:24,330 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:46:39,508 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:46:39,508 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:46:43,543 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:45,544 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:47,551 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:48,896 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:46:49,551 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:51,553 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:53,553 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:54,766 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:46:54,766 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:46:55,554 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:57,561 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:46:59,562 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:01,563 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:03,564 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:05,564 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:07,565 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:09,566 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:09,928 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:47:09,928 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:47:11,567 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:13,568 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:15,569 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:17,570 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:18,970 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:47:19,571 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:21,574 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:23,575 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:47:25,072 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:47:25,072 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:47:40,207 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:47:40,208 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:47:49,048 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:47:55,679 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:47:55,679 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:48:04,593 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:06,593 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:08,594 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:10,595 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:10,936 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:48:10,936 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:48:12,596 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:14,597 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:16,598 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:18,599 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:19,123 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:48:20,601 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:22,602 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:24,603 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:26,091 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:48:26,091 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:48:26,604 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:28,605 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:30,606 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:32,607 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:34,608 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:38,610 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:40,611 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:41,259 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:48:41,259 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:48:42,613 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:44,613 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:46,614 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:48,615 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:49,207 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:48:50,616 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:52,617 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:54,618 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:56,410 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:48:56,410 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:48:56,619 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:48:58,620 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:00,621 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:02,622 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:04,623 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:06,624 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:08,627 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:10,628 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:11,555 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:49:11,555 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:49:12,629 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:14,631 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:16,631 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:18,632 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:19,279 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:49:20,633 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:22,634 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:24,635 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:26,637 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:49:26,749 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:49:26,750 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:49:41,888 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:49:41,889 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:49:49,351 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:49:57,024 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:49:57,024 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:50:11,655 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:50:12,314 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:50:12,314 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:50:19,422 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:50:22,660 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:50:27,572 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:50:27,572 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:50:28,663 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:50:36,667 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:50:38,668 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:50:42,895 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:50:42,895 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:50:44,670 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:50:49,497 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:50:55,675 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:50:57,676 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:50:58,125 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:50:58,125 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:51:01,679 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:51:03,680 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:51:05,681 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:51:13,638 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:51:13,638 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:51:13,684 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:51:19,571 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:51:28,990 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:51:28,990 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:51:44,191 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:51:44,192 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:51:49,645 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:51:57,703 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:51:59,375 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:51:59,375 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:51:59,704 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:52:05,707 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:52:07,708 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:52:14,540 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:52:14,541 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:52:19,717 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:52:29,807 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:52:29,808 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:52:45,798 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:52:45,798 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:52:49,791 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:53:00,937 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:53:00,937 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:53:16,078 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:53:16,078 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:53:19,866 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:53:31,211 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:53:31,211 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:53:46,345 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:53:46,345 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:53:49,945 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:54:01,479 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:54:01,480 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:54:16,617 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:54:16,618 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:54:20,021 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:54:31,751 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:54:31,752 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:54:46,896 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:54:46,897 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:54:50,094 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:55:02,034 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:55:02,035 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:55:17,175 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:55:17,175 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:55:20,168 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:55:32,318 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:55:32,318 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:55:45,805 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:55:46,764 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:46,764 DEBUG SenderThread:3087125 [sender.py:send():234] send: telemetry +2022-07-30 11:55:46,765 DEBUG SenderThread:3087125 [sender.py:send():234] send: exit +2022-07-30 11:55:46,765 INFO SenderThread:3087125 [sender.py:send_exit():366] handling exit code: 1 +2022-07-30 11:55:46,766 INFO SenderThread:3087125 [sender.py:send_exit():368] handling runtime: 1019 +2022-07-30 11:55:46,766 INFO SenderThread:3087125 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 11:55:46,766 INFO SenderThread:3087125 [sender.py:send_exit():374] send defer +2022-07-30 11:55:46,766 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:46,767 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer +2022-07-30 11:55:46,767 INFO HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-30 11:55:46,767 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: defer +2022-07-30 11:55:46,767 INFO SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-30 11:55:46,767 INFO SenderThread:3087125 [sender.py:transition_state():387] send defer: 1 +2022-07-30 11:55:46,767 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer +2022-07-30 11:55:46,767 INFO HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-30 11:55:46,806 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:55:46,806 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json +2022-07-30 11:55:46,834 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: defer +2022-07-30 11:55:46,834 INFO SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-30 11:55:46,834 INFO SenderThread:3087125 [sender.py:transition_state():387] send defer: 2 +2022-07-30 11:55:46,834 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer +2022-07-30 11:55:46,835 DEBUG SenderThread:3087125 [sender.py:send():234] send: stats +2022-07-30 11:55:46,835 INFO HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-30 11:55:46,835 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: defer +2022-07-30 11:55:46,835 INFO SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-30 11:55:46,835 INFO SenderThread:3087125 [sender.py:transition_state():387] send defer: 3 +2022-07-30 11:55:46,836 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer +2022-07-30 11:55:46,836 INFO HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-30 11:55:46,836 DEBUG SenderThread:3087125 [sender.py:send():234] send: summary +2022-07-30 11:55:46,836 INFO SenderThread:3087125 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 11:55:46,836 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: defer +2022-07-30 11:55:46,836 INFO SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-30 11:55:46,836 INFO SenderThread:3087125 [sender.py:transition_state():387] send defer: 4 +2022-07-30 11:55:46,836 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer +2022-07-30 11:55:46,836 INFO HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-30 11:55:46,837 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: defer +2022-07-30 11:55:46,837 INFO SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-30 11:55:46,869 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:46,995 INFO SenderThread:3087125 [sender.py:transition_state():387] send defer: 5 +2022-07-30 11:55:46,995 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:46,995 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer +2022-07-30 11:55:46,995 INFO HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-30 11:55:46,996 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: defer +2022-07-30 11:55:46,996 INFO SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-30 11:55:46,996 INFO SenderThread:3087125 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-30 11:55:47,096 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:47,806 INFO Thread-8 :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/config.yaml +2022-07-30 11:55:47,806 INFO SenderThread:3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json +2022-07-30 11:55:47,807 INFO SenderThread:3087125 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files +2022-07-30 11:55:47,807 INFO SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/config.yaml config.yaml +2022-07-30 11:55:47,807 INFO SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/diff.patch diff.patch +2022-07-30 11:55:47,807 INFO SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt requirements.txt +2022-07-30 11:55:47,808 INFO SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log output.log +2022-07-30 11:55:47,810 INFO SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json wandb-summary.json +2022-07-30 11:55:47,811 INFO SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-metadata.json wandb-metadata.json +2022-07-30 11:55:47,814 INFO SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-30 11:55:47,814 INFO SenderThread:3087125 [sender.py:transition_state():387] send defer: 6 +2022-07-30 11:55:47,814 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:47,817 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer +2022-07-30 11:55:47,817 INFO HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-30 11:55:47,820 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: defer +2022-07-30 11:55:47,820 INFO SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-30 11:55:47,820 INFO SenderThread:3087125 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 11:55:47,918 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:47,918 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:48,020 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:48,020 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:48,121 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:48,121 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:48,223 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:48,223 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:48,284 INFO Thread-14 :3087125 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/config.yaml +2022-07-30 11:55:48,286 INFO Thread-17 :3087125 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json +2022-07-30 11:55:48,324 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:48,325 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:48,348 INFO Thread-15 :3087125 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt +2022-07-30 11:55:48,426 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:48,426 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:48,496 INFO Thread-16 :3087125 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log +2022-07-30 11:55:48,528 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:48,528 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:48,630 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:48,630 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:48,696 INFO Thread-7 :3087125 [sender.py:transition_state():387] send defer: 7 +2022-07-30 11:55:48,696 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer +2022-07-30 11:55:48,697 INFO HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-30 11:55:48,697 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: defer +2022-07-30 11:55:48,697 INFO SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-30 11:55:48,731 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:49,112 INFO SenderThread:3087125 [sender.py:transition_state():387] send defer: 8 +2022-07-30 11:55:49,112 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:49,112 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer +2022-07-30 11:55:49,112 INFO HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-30 11:55:49,113 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: defer +2022-07-30 11:55:49,113 INFO SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-30 11:55:49,113 INFO SenderThread:3087125 [sender.py:transition_state():387] send defer: 9 +2022-07-30 11:55:49,113 DEBUG SenderThread:3087125 [sender.py:send():234] send: final +2022-07-30 11:55:49,113 DEBUG SenderThread:3087125 [sender.py:send():234] send: footer +2022-07-30 11:55:49,113 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer +2022-07-30 11:55:49,113 INFO HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-30 11:55:49,114 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: defer +2022-07-30 11:55:49,114 INFO SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-30 11:55:49,213 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 11:55:49,213 DEBUG SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 11:55:49,214 INFO SenderThread:3087125 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 11:55:49,472 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: get_summary +2022-07-30 11:55:49,473 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-30 11:55:49,474 DEBUG HandlerThread:3087125 [handler.py:handle_request():130] handle_request: shutdown +2022-07-30 11:55:49,474 INFO HandlerThread:3087125 [handler.py:finish():731] shutting down handler +2022-07-30 11:55:50,114 INFO WriterThread:3087125 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/run-2hglxdx5.wandb +2022-07-30 11:55:50,471 INFO SenderThread:3087125 [sender.py:finish():1070] shutting down sender +2022-07-30 11:55:50,471 INFO SenderThread:3087125 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 11:55:50,471 INFO SenderThread:3087125 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 11:55:50,474 INFO MainThread:3087125 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220730_113845-2hglxdx5/logs/debug.log b/wandb/run-20220730_113845-2hglxdx5/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..5192a2616aed5fe7cb6304557316b8289bf41b74 --- /dev/null +++ b/wandb/run-20220730_113845-2hglxdx5/logs/debug.log @@ -0,0 +1,148 @@ +2022-07-30 11:38:45,981 INFO MainThread:3085831 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-30 11:38:45,981 INFO MainThread:3085831 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-30 11:38:45,981 INFO MainThread:3085831 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/logs/debug.log +2022-07-30 11:38:45,981 INFO MainThread:3085831 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/logs/debug-internal.log +2022-07-30 11:38:45,981 INFO MainThread:3085831 [wandb_init.py:init():404] calling init triggers +2022-07-30 11:38:45,981 INFO MainThread:3085831 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-30 11:38:45,981 INFO MainThread:3085831 [wandb_init.py:init():460] starting backend +2022-07-30 11:38:45,981 INFO MainThread:3085831 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-30 11:38:46,030 INFO MainThread:3085831 [backend.py:ensure_launched():216] starting backend process... +2022-07-30 11:38:46,076 INFO MainThread:3085831 [backend.py:ensure_launched():221] started backend process with pid: 3087125 +2022-07-30 11:38:46,078 INFO MainThread:3085831 [wandb_init.py:init():469] backend started and connected +2022-07-30 11:38:46,092 INFO MainThread:3085831 [wandb_init.py:init():533] updated telemetry +2022-07-30 11:38:46,206 INFO MainThread:3085831 [wandb_init.py:init():563] communicating current version +2022-07-30 11:38:46,946 INFO MainThread:3085831 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-30 11:38:46,946 INFO MainThread:3085831 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-30 11:38:47,302 INFO MainThread:3085831 [wandb_init.py:init():606] starting run threads in backend +2022-07-30 11:38:49,748 INFO MainThread:3085831 [wandb_run.py:_console_start():1810] atexit reg +2022-07-30 11:38:49,749 INFO MainThread:3085831 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-30 11:38:49,749 INFO MainThread:3085831 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-30 11:38:49,751 INFO MainThread:3085831 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-30 11:38:49,752 INFO MainThread:3085831 [wandb_init.py:init():633] run started, returning control to user process +2022-07-30 11:55:44,218 INFO MainThread:3085831 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-30 11:55:44,224 INFO MainThread:3085831 [wandb_run.py:_restore():1752] restore +2022-07-30 11:55:46,767 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 74351 +} + +2022-07-30 11:55:46,995 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 74351 +} + +2022-07-30 11:55:47,817 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 419641 +} + +2022-07-30 11:55:47,919 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 419641 +} + +2022-07-30 11:55:48,020 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 419641 + total_bytes: 419641 +} + +2022-07-30 11:55:48,122 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 419641 + total_bytes: 419641 +} + +2022-07-30 11:55:48,223 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 419641 + total_bytes: 419641 +} + +2022-07-30 11:55:48,325 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 419641 + total_bytes: 419641 +} + +2022-07-30 11:55:48,427 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 419641 + total_bytes: 419641 +} + +2022-07-30 11:55:48,529 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 419641 + total_bytes: 419641 +} + +2022-07-30 11:55:48,630 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 419641 + total_bytes: 419641 +} + +2022-07-30 11:55:49,112 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 419641 + total_bytes: 419641 +} + +2022-07-30 11:55:49,472 INFO MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 419641 + total_bytes: 419641 +} +local_info { +} + +2022-07-30 11:55:51,044 INFO MainThread:3085831 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220730_113845-2hglxdx5/run-2hglxdx5.wandb b/wandb/run-20220730_113845-2hglxdx5/run-2hglxdx5.wandb new file mode 100644 index 0000000000000000000000000000000000000000..b37dd4ad70285d172c690390b73198d978868ea0 --- /dev/null +++ b/wandb/run-20220730_113845-2hglxdx5/run-2hglxdx5.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05d045fc74b77c5283c71e9b35293393394b98b30c2b916294e2cb576751a00 +size 472289 diff --git a/wandb/run-20220730_115718-1xckv47v/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_115718-1xckv47v/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac --- /dev/null +++ b/wandb/run-20220730_115718-1xckv47v/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220730_115718-1xckv47v/files/config.yaml b/wandb/run-20220730_115718-1xckv47v/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af2d4af0ae6d32909b3c95e30f0f5be508b0001c --- /dev/null +++ b/wandb/run-20220730_115718-1xckv47v/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659182238 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220730_115718-1xckv47v/files/diff.patch b/wandb/run-20220730_115718-1xckv47v/files/diff.patch new file mode 100644 index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642 --- /dev/null +++ b/wandb/run-20220730_115718-1xckv47v/files/diff.patch @@ -0,0 +1,10 @@ +diff --git a/.gitattributes b/.gitattributes +index 755356a..f0eef4b 100644 +--- a/.gitattributes ++++ b/.gitattributes +@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text + *.zip filter=lfs diff=lfs merge=lfs -text + *.zstandard filter=lfs diff=lfs merge=lfs -text + *tfevents* filter=lfs diff=lfs merge=lfs -text ++*.wandb filter=lfs diff=lfs merge=lfs -text +\ No newline at end of file diff --git a/wandb/run-20220730_115718-1xckv47v/files/output.log b/wandb/run-20220730_115718-1xckv47v/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..0902f5d44f1c27bf252d41f55c7fbfe0735856ee --- /dev/null +++ b/wandb/run-20220730_115718-1xckv47v/files/output.log @@ -0,0 +1,1181 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul30_11-57-13_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=20, +per_device_train_batch_size=20, +precision=full_mixed, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 73.76it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 380.57it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'bias'), ('project_hid', 'bias'), ('quantizer', 'codevectors'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('project_hid', 'kernel'), ('quantizer', 'weight_proj', 'kernel')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9040.69ex/s] +removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8753.07ex/s] +removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8857.64ex/s] +removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8329.13ex/s] +removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9201.53ex/s] +removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8829.06ex/s] +removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7897.68ex/s] +removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8353.78ex/s] +removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8645.86ex/s] +removing punctuation from train split #4: 89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8519/9523 [00:01<00:00, 8100.65ex/s] +removing punctuation from train split #4: 98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9339/9523 [00:01<00:00, 8040.69ex/s] +removing punctuation from train split #6: 96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9123/9523 [00:01<00:00, 8476.80ex/s] +removing punctuation from train split #7: 92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8794/9523 [00:01<00:00, 8218.43ex/s] +removing punctuation from train split #8: 65%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 6218/9523 [00:00<00:00, 8866.88ex/s] +removing punctuation from train split #9: 53%|████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 5059/9523 [00:00<00:00, 7901.23ex/s] +removing punctuation from train split #8: 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 7105/9523 [00:00<00:00, 8853.02ex/s] +removing punctuation from train split #8: 93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8873/9523 [00:01<00:00, 8367.68ex/s] +removing punctuation from train split #9: 80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7652/9523 [00:01<00:00, 6570.09ex/s] +removing punctuation from train split #9: 88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8368/9523 [00:01<00:00, 6605.89ex/s] +removing punctuation from train split #10: 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8766/9523 [00:01<00:00, 8388.55ex/s] +removing punctuation from train split #11: 91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8647/9523 [00:01<00:00, 8288.42ex/s] +removing punctuation from train split #12: 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9060/9522 [00:01<00:00, 9381.60ex/s] +removing punctuation from train split #13: 81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7740/9522 [00:00<00:00, 7945.57ex/s] +removing punctuation from train split #14: 72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 6881/9522 [00:00<00:00, 8757.71ex/s] +removing punctuation from train split #15: 72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 6822/9522 [00:00<00:00, 8696.29ex/s] +removing punctuation from train split #16: 73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 6915/9522 [00:00<00:00, 8780.45ex/s] +removing punctuation from train split #17: 53%|███████████████████████████████████████████████████████████████████████████████████████████████████████ | 5005/9522 [00:00<00:00, 8535.15ex/s] +removing punctuation from train split #18: 54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 5140/9522 [00:00<00:00, 8692.31ex/s] +removing punctuation from train split #19: 44%|██████████████████████████████████████████████████████████████████████████████████████ | 4183/9522 [00:00<00:00, 8514.79ex/s] +removing punctuation from train split #20: 35%|████████████████████████████████████████████████████████████████████▊ | 3342/9522 [00:00<00:00, 8442.54ex/s] +removing punctuation from train split #21: 35%|████████████████████████████████████████████████████████████████████ | 3308/9522 [00:00<00:00, 8375.72ex/s] +removing punctuation from train split #22: 26%|███████████████████████████████████████████████████▉ | 2521/9522 [00:00<00:00, 8489.99ex/s] +removing punctuation from train split #23: 26%|███████████████████████████████████████████████████▋ | 2514/9522 [00:00<00:00, 8496.01ex/s] +removing punctuation from train split #24: 11%|█████████████████████▊ | 1062/9522 [00:00<00:01, 5187.20ex/s] +removing punctuation from train split #25: 17%|██████████████████████████████████▏ | 1658/9522 [00:00<00:00, 8350.20ex/s] +removing punctuation from train split #26: 7%|█████████████▎ | 641/9522 [00:00<00:01, 6401.32ex/s] +removing punctuation from train split #27: 0%| | 0/9522 [00:00 to the vocabulary +Adding to the vocabulary +2022-07-30 12:09:22.954433: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2022-07-30 12:09:22.954469: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) +/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) +INFO:__main__:***** Running training ***** +INFO:__main__: Num examples = 302693 +INFO:__main__: Num Epochs = 40 +INFO:__main__: Instantaneous batch size per device = 20 +INFO:__main__: Num gradient accumulation steps = 1 +INFO:__main__: Total train batch size (w. parallel & distributed) = 160 +INFO:__main__: Total optimization steps = 75640 +INFO:__main__: Gradient checkpointing: True +INFO:__main__: Use scan: False +INFO:__main__: Fuse matmuls: False +Epoch ... (1/40): 0%| | 0/40 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) + File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss + out_tree, out_flat = f_pmapped_(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f + out = pxla.xla_pmap( + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind + return map_bind(self, fun, *args, **params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process + return trace.process_map(self, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call + return primitive.impl(f, *tracers, **params) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 841, in xla_pmap_impl + return compiled_fun(*args) + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1656, in __call__ + out_bufs = self.xla_executable.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: Attempting to reserve 6.77G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 6.48G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation. The nearest obstacle is at 6.48G from the bottom with size 16.0K.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1605, in + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) +jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: Attempting to reserve 6.77G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 6.48G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation. The nearest obstacle is at 6.48G from the bottom with size 16.0K.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20220730_115718-1xckv47v/files/requirements.txt b/wandb/run-20220730_115718-1xckv47v/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220730_115718-1xckv47v/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220730_115718-1xckv47v/files/wandb-metadata.json b/wandb/run-20220730_115718-1xckv47v/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4bcd35b5167cbbba5a54b4a189f2d1bc09a0c195 --- /dev/null +++ b/wandb/run-20220730_115718-1xckv47v/files/wandb-metadata.json @@ -0,0 +1,67 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-30T11:57:21.565402", + "startedAt": "2022-07-30T11:57:18.095485", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=./", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=20", + "--per_device_eval_batch_size=20", + "--gradient_accumulation_steps=1", + "--precision=full_mixed", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json b/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a7ef6c1b2317e4698b2d4108e7c29c613ad653db --- /dev/null +++ b/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 1012}} \ No newline at end of file diff --git a/wandb/run-20220730_115718-1xckv47v/logs/debug-internal.log b/wandb/run-20220730_115718-1xckv47v/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..1bd716e1cbfc7bc75dc3a97b177ca49fffe11498 --- /dev/null +++ b/wandb/run-20220730_115718-1xckv47v/logs/debug-internal.log @@ -0,0 +1,576 @@ +2022-07-30 11:57:19,006 INFO MainThread:2100683 [internal.py:wandb_internal():87] W&B internal server running at pid: 2100683, started at: 2022-07-30 11:57:19.006054 +2022-07-30 11:57:19,008 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: check_version +2022-07-30 11:57:19,008 INFO WriterThread:2100683 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/run-1xckv47v.wandb +2022-07-30 11:57:19,009 DEBUG SenderThread:2100683 [sender.py:send():234] send: header +2022-07-30 11:57:19,009 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: check_version +2022-07-30 11:57:19,060 DEBUG SenderThread:2100683 [sender.py:send():234] send: run +2022-07-30 11:57:19,249 INFO SenderThread:2100683 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files +2022-07-30 11:57:19,249 INFO SenderThread:2100683 [sender.py:_start_run_threads():804] run started: 1xckv47v with start time 1659182238 +2022-07-30 11:57:19,249 DEBUG SenderThread:2100683 [sender.py:send():234] send: summary +2022-07-30 11:57:19,249 INFO SenderThread:2100683 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 11:57:19,249 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: run_start +2022-07-30 11:57:20,269 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json +2022-07-30 11:57:21,565 DEBUG HandlerThread:2100683 [meta.py:__init__():40] meta init +2022-07-30 11:57:21,565 DEBUG HandlerThread:2100683 [meta.py:__init__():54] meta init done +2022-07-30 11:57:21,565 DEBUG HandlerThread:2100683 [meta.py:probe():214] probe +2022-07-30 11:57:21,566 DEBUG HandlerThread:2100683 [meta.py:_setup_git():204] setup git +2022-07-30 11:57:21,604 DEBUG HandlerThread:2100683 [meta.py:_setup_git():211] setup git done +2022-07-30 11:57:21,604 DEBUG HandlerThread:2100683 [meta.py:_save_code():92] save code +2022-07-30 11:57:21,617 DEBUG HandlerThread:2100683 [meta.py:_save_code():113] save code done +2022-07-30 11:57:21,618 DEBUG HandlerThread:2100683 [meta.py:_save_patches():130] save patches +2022-07-30 11:57:21,695 DEBUG HandlerThread:2100683 [meta.py:_save_patches():172] save patches done +2022-07-30 11:57:21,695 DEBUG HandlerThread:2100683 [meta.py:_save_pip():58] save pip +2022-07-30 11:57:21,696 DEBUG HandlerThread:2100683 [meta.py:_save_pip():72] save pip done +2022-07-30 11:57:21,696 DEBUG HandlerThread:2100683 [meta.py:probe():252] probe done +2022-07-30 11:57:21,699 DEBUG SenderThread:2100683 [sender.py:send():234] send: files +2022-07-30 11:57:21,699 INFO SenderThread:2100683 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-30 11:57:21,699 INFO SenderThread:2100683 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-30 11:57:21,700 INFO SenderThread:2100683 [sender.py:_save_file():939] saving file diff.patch with policy now +2022-07-30 11:57:21,707 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:57:21,707 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:57:22,197 INFO Thread-11 :2100683 [upload_job.py:push():137] Uploaded file /tmp/tmpcy1kthhcwandb/3w11fl2a-wandb-metadata.json +2022-07-30 11:57:22,212 INFO Thread-13 :2100683 [upload_job.py:push():137] Uploaded file /tmp/tmpcy1kthhcwandb/3tkinvp2-diff.patch +2022-07-30 11:57:22,268 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/code/run_flax_speech_recognition_ctc.py +2022-07-30 11:57:22,269 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-metadata.json +2022-07-30 11:57:22,269 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/requirements.txt +2022-07-30 11:57:22,269 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:57:22,269 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/diff.patch +2022-07-30 11:57:22,269 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/code +2022-07-30 11:57:22,394 INFO Thread-12 :2100683 [upload_job.py:push():137] Uploaded file /tmp/tmpcy1kthhcwandb/3mlm83yr-code/run_flax_speech_recognition_ctc.py +2022-07-30 11:57:24,269 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:57:26,270 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:57:28,272 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:57:30,273 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:57:36,276 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:57:36,845 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:57:36,845 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:57:38,276 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:57:49,650 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 11:57:51,984 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:57:51,985 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:57:52,282 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:57:54,283 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:57:56,284 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:58:05,287 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:58:07,119 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:58:07,120 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:58:07,288 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:58:19,727 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 11:58:21,294 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:58:22,254 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:58:22,255 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:58:37,414 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:58:37,414 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:58:49,805 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 11:58:52,720 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:58:52,720 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:59:04,311 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:06,312 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:07,883 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:59:07,884 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:59:08,313 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:10,314 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:12,315 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:14,316 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:16,317 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:18,318 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:19,880 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 11:59:20,319 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:22,320 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:23,061 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:59:23,062 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:59:24,321 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:26,322 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:28,323 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:30,324 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:32,325 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:34,326 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:36,327 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:38,257 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:59:38,258 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:59:38,328 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:40,330 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:42,330 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:45,332 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:47,333 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:49,334 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:49,960 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 11:59:51,335 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:53,336 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:53,397 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 11:59:53,398 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 11:59:55,337 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:57,338 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 11:59:59,342 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:01,340 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:03,341 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:05,343 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:07,345 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:08,546 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:00:08,546 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:00:09,346 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:11,347 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:13,348 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:15,349 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:17,350 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:19,351 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:20,032 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:00:21,355 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:23,353 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:23,746 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:00:23,746 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:00:25,354 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:27,356 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:29,357 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:31,358 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:33,359 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:35,360 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:37,362 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:38,888 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:00:38,889 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:00:39,363 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:41,364 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:43,365 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:45,366 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:47,367 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:49,368 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:50,118 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:00:51,369 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:53,370 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:54,024 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:00:54,024 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:00:55,371 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:57,373 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:00:59,374 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:01,375 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:03,376 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:05,376 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:07,378 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:09,160 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:01:09,161 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:01:09,382 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:11,380 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:13,381 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:15,382 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:17,384 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:19,384 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:20,193 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:01:21,386 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:23,387 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:24,314 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:01:24,314 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:01:25,388 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:27,390 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:29,391 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:31,392 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:33,393 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:35,395 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:37,395 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:39,396 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:39,469 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:01:39,470 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:01:41,397 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:43,398 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:45,399 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:47,400 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:49,401 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:50,287 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:01:51,403 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:53,404 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:54,608 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:01:54,608 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:01:55,405 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:57,406 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:01:59,407 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:01,408 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:03,410 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:05,411 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:07,412 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:09,413 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:09,753 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:02:09,754 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:02:11,414 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:13,415 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:15,416 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:17,417 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:19,418 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:20,379 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:02:21,419 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:23,420 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:24,893 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:02:24,893 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:02:25,421 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:27,422 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:29,423 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:31,425 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:33,426 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:35,427 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:37,428 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:39,429 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:40,051 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:02:40,051 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:02:41,430 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:43,431 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:45,434 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:47,433 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:49,434 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:50,452 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:02:51,435 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:53,436 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:55,195 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:02:55,195 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:02:55,437 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:57,438 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:02:59,439 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:01,440 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:03,441 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:05,442 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:07,443 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:09,444 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:10,353 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:03:10,353 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:03:11,446 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:13,447 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:15,448 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:17,451 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:19,452 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:20,524 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:03:21,453 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:23,454 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:25,456 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:25,492 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:03:25,492 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:03:27,457 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:29,458 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:32,459 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:34,460 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:36,461 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:38,462 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:40,463 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:40,635 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:03:40,636 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:03:42,464 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:44,466 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:46,471 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:48,472 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:50,473 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:50,617 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:03:52,474 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:54,477 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:55,772 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:03:55,773 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:03:56,478 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:03:58,479 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:00,481 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:02,482 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:04,483 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:06,484 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:08,486 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:10,487 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:10,957 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:04:10,958 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:04:12,488 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:14,489 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:16,490 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:18,492 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:20,493 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:20,699 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:04:22,494 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:24,495 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:26,109 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:04:26,110 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:04:26,496 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:28,497 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:30,498 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:32,499 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:34,500 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:36,501 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:38,502 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:40,503 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:41,254 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:04:41,255 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:04:42,504 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:44,506 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:46,507 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:04:50,783 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:04:56,392 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:04:56,393 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:05:11,600 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:05:11,600 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:05:15,522 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:17,523 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:19,524 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:20,857 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:05:21,525 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:23,527 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:25,528 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:26,847 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:05:26,852 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:05:27,529 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:29,530 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:31,531 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:33,532 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:35,533 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:37,534 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:39,535 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:41,536 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:41,996 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:05:41,997 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:05:43,537 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:45,538 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:47,539 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:49,543 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:50,941 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:05:51,544 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:53,546 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:55,547 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:05:57,135 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:05:57,135 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:06:12,274 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:06:12,274 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:06:21,024 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:06:27,503 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:06:27,503 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:06:33,565 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:35,566 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:37,567 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:39,573 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:41,574 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:42,717 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:06:42,717 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:06:43,575 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:45,577 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:47,578 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:49,579 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:51,101 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:06:51,580 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:53,581 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:55,582 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:57,583 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:06:57,855 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:06:57,856 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:06:59,584 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:01,585 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:03,586 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:07,588 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:09,589 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:11,590 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:13,004 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:07:13,005 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:07:13,591 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:15,593 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:17,594 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:19,595 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:21,189 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:07:21,597 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:23,598 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:25,599 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:27,600 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:28,147 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:07:28,147 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:07:29,601 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:31,602 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:33,603 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:36,605 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:38,606 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:40,607 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:42,608 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:43,294 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:07:43,294 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:07:44,609 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:46,610 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:48,611 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:50,612 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:51,271 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:07:52,613 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:54,614 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:56,615 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:07:58,432 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:07:58,432 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:08:13,567 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:08:13,567 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:08:21,343 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:08:28,719 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:08:28,719 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:08:40,633 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:08:44,070 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:08:44,070 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:08:48,636 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:08:51,414 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:08:57,640 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:08:59,415 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:08:59,416 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:09:06,644 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:09:14,648 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:09:14,822 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:09:14,822 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:09:21,485 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:09:23,652 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:09:29,654 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:09:30,071 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:09:30,072 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:09:31,655 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:09:33,656 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:09:42,660 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:09:45,442 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:09:45,443 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:09:51,559 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:10:00,687 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:10:00,687 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:10:15,852 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:10:15,852 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:10:20,675 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:10:21,635 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:10:28,679 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:10:31,135 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:10:31,136 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:10:31,680 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:10:46,300 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:10:46,300 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:10:51,711 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:11:01,449 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:11:01,450 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:11:16,905 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:11:16,905 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:11:21,790 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:11:32,041 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:11:32,041 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:11:47,175 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:11:47,176 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:11:51,866 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:12:02,312 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:12:02,313 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:12:17,454 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:12:17,454 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:12:21,945 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:12:32,586 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:12:32,586 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:12:47,720 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:12:47,720 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:12:52,021 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:13:02,858 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:13:02,858 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:13:18,016 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:13:18,016 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:13:22,094 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:13:33,162 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:13:33,162 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:13:48,301 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:13:48,302 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:13:52,166 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:14:03,438 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:14:03,438 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:14:10,811 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:14:12,147 DEBUG SenderThread:2100683 [sender.py:send():234] send: telemetry +2022-07-30 12:14:12,147 DEBUG SenderThread:2100683 [sender.py:send():234] send: exit +2022-07-30 12:14:12,147 INFO SenderThread:2100683 [sender.py:send_exit():366] handling exit code: 1 +2022-07-30 12:14:12,148 INFO SenderThread:2100683 [sender.py:send_exit():368] handling runtime: 1012 +2022-07-30 12:14:12,148 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:12,149 INFO SenderThread:2100683 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 12:14:12,149 INFO SenderThread:2100683 [sender.py:send_exit():374] send defer +2022-07-30 12:14:12,149 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:12,150 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:14:12,150 INFO HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-30 12:14:12,150 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: defer +2022-07-30 12:14:12,150 INFO SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-30 12:14:12,150 INFO SenderThread:2100683 [sender.py:transition_state():387] send defer: 1 +2022-07-30 12:14:12,151 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:14:12,151 INFO HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-30 12:14:12,215 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: defer +2022-07-30 12:14:12,215 INFO SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-30 12:14:12,215 INFO SenderThread:2100683 [sender.py:transition_state():387] send defer: 2 +2022-07-30 12:14:12,215 DEBUG SenderThread:2100683 [sender.py:send():234] send: stats +2022-07-30 12:14:12,216 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:14:12,216 INFO HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-30 12:14:12,216 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: defer +2022-07-30 12:14:12,216 INFO SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-30 12:14:12,216 INFO SenderThread:2100683 [sender.py:transition_state():387] send defer: 3 +2022-07-30 12:14:12,216 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:14:12,216 INFO HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-30 12:14:12,216 DEBUG SenderThread:2100683 [sender.py:send():234] send: summary +2022-07-30 12:14:12,217 INFO SenderThread:2100683 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 12:14:12,217 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: defer +2022-07-30 12:14:12,217 INFO SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-30 12:14:12,217 INFO SenderThread:2100683 [sender.py:transition_state():387] send defer: 4 +2022-07-30 12:14:12,217 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:14:12,217 INFO HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-30 12:14:12,217 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: defer +2022-07-30 12:14:12,217 INFO SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-30 12:14:12,253 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:12,377 INFO SenderThread:2100683 [sender.py:transition_state():387] send defer: 5 +2022-07-30 12:14:12,377 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:12,377 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:14:12,377 INFO HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-30 12:14:12,378 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: defer +2022-07-30 12:14:12,378 INFO SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-30 12:14:12,378 INFO SenderThread:2100683 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-30 12:14:12,479 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:12,812 INFO Thread-8 :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/config.yaml +2022-07-30 12:14:12,813 INFO SenderThread:2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json +2022-07-30 12:14:12,813 INFO SenderThread:2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:14:12,813 INFO SenderThread:2100683 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files +2022-07-30 12:14:12,813 INFO SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/config.yaml config.yaml +2022-07-30 12:14:12,814 INFO SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/diff.patch diff.patch +2022-07-30 12:14:12,814 INFO SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/requirements.txt requirements.txt +2022-07-30 12:14:12,814 INFO SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log output.log +2022-07-30 12:14:12,814 INFO SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json wandb-summary.json +2022-07-30 12:14:12,814 INFO SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-metadata.json wandb-metadata.json +2022-07-30 12:14:12,814 INFO SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-30 12:14:12,814 INFO SenderThread:2100683 [sender.py:transition_state():387] send defer: 6 +2022-07-30 12:14:12,814 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:12,816 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:14:12,816 INFO HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-30 12:14:12,819 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: defer +2022-07-30 12:14:12,819 INFO SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-30 12:14:12,819 INFO SenderThread:2100683 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 12:14:12,916 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:12,917 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:13,019 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:13,019 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:13,121 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:13,121 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:13,223 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:13,223 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:13,295 INFO Thread-15 :2100683 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/requirements.txt +2022-07-30 12:14:13,325 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:13,325 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:13,330 INFO Thread-17 :2100683 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json +2022-07-30 12:14:13,335 INFO Thread-14 :2100683 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/config.yaml +2022-07-30 12:14:13,426 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:13,427 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:13,503 INFO Thread-16 :2100683 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log +2022-07-30 12:14:13,528 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:13,528 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:13,630 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:13,630 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:13,704 INFO Thread-7 :2100683 [sender.py:transition_state():387] send defer: 7 +2022-07-30 12:14:13,705 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:14:13,705 INFO HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-30 12:14:13,705 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: defer +2022-07-30 12:14:13,705 INFO SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-30 12:14:13,732 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:14,143 INFO SenderThread:2100683 [sender.py:transition_state():387] send defer: 8 +2022-07-30 12:14:14,143 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:14,144 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:14:14,144 INFO HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-30 12:14:14,144 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: defer +2022-07-30 12:14:14,144 INFO SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-30 12:14:14,144 INFO SenderThread:2100683 [sender.py:transition_state():387] send defer: 9 +2022-07-30 12:14:14,144 DEBUG SenderThread:2100683 [sender.py:send():234] send: final +2022-07-30 12:14:14,145 DEBUG SenderThread:2100683 [sender.py:send():234] send: footer +2022-07-30 12:14:14,145 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:14:14,145 INFO HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-30 12:14:14,145 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: defer +2022-07-30 12:14:14,145 INFO SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-30 12:14:14,245 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:14:14,246 DEBUG SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:14:14,246 INFO SenderThread:2100683 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 12:14:14,513 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: get_summary +2022-07-30 12:14:14,514 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-30 12:14:14,515 DEBUG HandlerThread:2100683 [handler.py:handle_request():130] handle_request: shutdown +2022-07-30 12:14:14,515 INFO HandlerThread:2100683 [handler.py:finish():731] shutting down handler +2022-07-30 12:14:15,145 INFO WriterThread:2100683 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/run-1xckv47v.wandb +2022-07-30 12:14:15,512 INFO SenderThread:2100683 [sender.py:finish():1070] shutting down sender +2022-07-30 12:14:15,512 INFO SenderThread:2100683 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 12:14:15,512 INFO SenderThread:2100683 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 12:14:15,516 INFO MainThread:2100683 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220730_115718-1xckv47v/logs/debug.log b/wandb/run-20220730_115718-1xckv47v/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..3a5ea67cda0ea061f444b2c2b3936eeb580fd16e --- /dev/null +++ b/wandb/run-20220730_115718-1xckv47v/logs/debug.log @@ -0,0 +1,148 @@ +2022-07-30 11:57:18,097 INFO MainThread:2099342 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-30 11:57:18,097 INFO MainThread:2099342 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-30 11:57:18,097 INFO MainThread:2099342 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/logs/debug.log +2022-07-30 11:57:18,097 INFO MainThread:2099342 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/logs/debug-internal.log +2022-07-30 11:57:18,097 INFO MainThread:2099342 [wandb_init.py:init():404] calling init triggers +2022-07-30 11:57:18,097 INFO MainThread:2099342 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-30 11:57:18,097 INFO MainThread:2099342 [wandb_init.py:init():460] starting backend +2022-07-30 11:57:18,097 INFO MainThread:2099342 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-30 11:57:18,155 INFO MainThread:2099342 [backend.py:ensure_launched():216] starting backend process... +2022-07-30 11:57:18,200 INFO MainThread:2099342 [backend.py:ensure_launched():221] started backend process with pid: 2100683 +2022-07-30 11:57:18,202 INFO MainThread:2099342 [wandb_init.py:init():469] backend started and connected +2022-07-30 11:57:18,218 INFO MainThread:2099342 [wandb_init.py:init():533] updated telemetry +2022-07-30 11:57:18,329 INFO MainThread:2099342 [wandb_init.py:init():563] communicating current version +2022-07-30 11:57:19,059 INFO MainThread:2099342 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-30 11:57:19,059 INFO MainThread:2099342 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-30 11:57:19,249 INFO MainThread:2099342 [wandb_init.py:init():606] starting run threads in backend +2022-07-30 11:57:21,703 INFO MainThread:2099342 [wandb_run.py:_console_start():1810] atexit reg +2022-07-30 11:57:21,703 INFO MainThread:2099342 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-30 11:57:21,704 INFO MainThread:2099342 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-30 11:57:21,706 INFO MainThread:2099342 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-30 11:57:21,706 INFO MainThread:2099342 [wandb_init.py:init():633] run started, returning control to user process +2022-07-30 12:14:09,859 INFO MainThread:2099342 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-30 12:14:09,864 INFO MainThread:2099342 [wandb_run.py:_restore():1752] restore +2022-07-30 12:14:12,151 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 74351 +} + +2022-07-30 12:14:12,378 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 74351 +} + +2022-07-30 12:14:12,815 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 352530 +} + +2022-07-30 12:14:12,917 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 352530 +} + +2022-07-30 12:14:13,020 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 352530 + total_bytes: 352530 +} + +2022-07-30 12:14:13,122 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 352530 + total_bytes: 352530 +} + +2022-07-30 12:14:13,224 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 352530 + total_bytes: 352530 +} + +2022-07-30 12:14:13,325 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 352530 + total_bytes: 352530 +} + +2022-07-30 12:14:13,427 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 352530 + total_bytes: 352530 +} + +2022-07-30 12:14:13,529 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 352530 + total_bytes: 352530 +} + +2022-07-30 12:14:13,631 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 352530 + total_bytes: 352530 +} + +2022-07-30 12:14:14,144 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 352530 + total_bytes: 352530 +} + +2022-07-30 12:14:14,512 INFO MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 352530 + total_bytes: 352530 +} +local_info { +} + +2022-07-30 12:14:16,128 INFO MainThread:2099342 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220730_115718-1xckv47v/run-1xckv47v.wandb b/wandb/run-20220730_115718-1xckv47v/run-1xckv47v.wandb new file mode 100644 index 0000000000000000000000000000000000000000..f723f166353ea5a6b6289dca3f792c47d7ff34a6 --- /dev/null +++ b/wandb/run-20220730_115718-1xckv47v/run-1xckv47v.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f90f5516ba68c809b54d5aed385c1e6a47f9106bde8abf07065bde01cf1e335 +size 409339 diff --git a/wandb/run-20220730_122457-1iypf07q/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_122457-1iypf07q/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac --- /dev/null +++ b/wandb/run-20220730_122457-1iypf07q/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220730_122457-1iypf07q/files/config.yaml b/wandb/run-20220730_122457-1iypf07q/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa1ea04d345a61a265eace828abf4596b653070e --- /dev/null +++ b/wandb/run-20220730_122457-1iypf07q/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659183897 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220730_122457-1iypf07q/files/diff.patch b/wandb/run-20220730_122457-1iypf07q/files/diff.patch new file mode 100644 index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642 --- /dev/null +++ b/wandb/run-20220730_122457-1iypf07q/files/diff.patch @@ -0,0 +1,10 @@ +diff --git a/.gitattributes b/.gitattributes +index 755356a..f0eef4b 100644 +--- a/.gitattributes ++++ b/.gitattributes +@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text + *.zip filter=lfs diff=lfs merge=lfs -text + *.zstandard filter=lfs diff=lfs merge=lfs -text + *tfevents* filter=lfs diff=lfs merge=lfs -text ++*.wandb filter=lfs diff=lfs merge=lfs -text +\ No newline at end of file diff --git a/wandb/run-20220730_122457-1iypf07q/files/output.log b/wandb/run-20220730_122457-1iypf07q/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9e568a88c541956990cb1195ed4e86fcb67e0313 --- /dev/null +++ b/wandb/run-20220730_122457-1iypf07q/files/output.log @@ -0,0 +1,1247 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul30_12-24-49_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=20, +per_device_train_batch_size=20, +precision=full_mixed, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) + + +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:07<00:00, 3.86s/it] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00, 2.09it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'kernel'), ('project_hid', 'kernel'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'bias'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'codevectors'), ('project_hid', 'bias')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 0%|▌ | 29/9523 [00:01<06:34, 24.05ex/s] +removing punctuation from train split #1: 0%|▋ | 34/9523 [00:01<05:14, 30.21ex/s] +removing punctuation from train split #2: 0%|▌ | 28/9523 [00:01<07:10, 22.05ex/s] +removing punctuation from train split #3: 0%|▌ | 26/9523 [00:00<04:36, 34.30ex/s] +removing punctuation from train split #4: 0%|▌ | 25/9523 [00:00<04:38, 34.12ex/s] +removing punctuation from train split #5: 0%|▍ | 20/9523 [00:00<05:22, 29.45ex/s] +removing punctuation from train split #6: 0%|▍ | 23/9523 [00:00<04:59, 31.74ex/s] +removing punctuation from train split #7: 0%|▍ | 18/9523 [00:00<05:22, 29.46ex/s] +removing punctuation from train split #8: 0%|▎ | 16/9523 [00:00<05:15, 30.18ex/s] +removing punctuation from train split #9: 0%|▎ | 17/9523 [00:00<05:26, 29.09ex/s] +removing punctuation from train split #10: 0%|▍ | 21/9523 [00:00<03:54, 40.58ex/s] +removing punctuation from train split #11: 0%|▎ | 14/9523 [00:00<05:20, 29.64ex/s] +removing punctuation from train split #12: 0%|▏ | 9/9522 [00:00<06:47, 23.37ex/s] +removing punctuation from train split #13: 0%|▎ | 12/9522 [00:00<05:13, 30.37ex/s] +removing punctuation from train split #14: 0%|▎ | 15/9522 [00:00<03:27, 45.77ex/s] +removing punctuation from train split #15: 0%|▏ | 9/9522 [00:00<04:15, 37.30ex/s] +removing punctuation from train split #16: 0%|▏ | 8/9522 [00:00<04:11, 37.86ex/s] +removing punctuation from train split #17: 0%|▏ | 7/9522 [00:00<04:49, 32.82ex/s] +removing punctuation from train split #18: 0%| | 4/9522 [00:00<04:04, 38.90ex/s] +removing punctuation from train split #19: 0%| | 3/9522 [00:00<07:51, 20.21ex/s] +removing punctuation from train split #20: 0%| | 0/9522 [00:00 to the vocabulary +Adding to the vocabulary +2022-07-30 12:37:31.652469: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2022-07-30 12:37:31.652528: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) +/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) +INFO:__main__:***** Running training ***** +INFO:__main__: Num examples = 302693 +INFO:__main__: Num Epochs = 40 +INFO:__main__: Instantaneous batch size per device = 20 +INFO:__main__: Num gradient accumulation steps = 1 +INFO:__main__: Total train batch size (w. parallel & distributed) = 160 +INFO:__main__: Total optimization steps = 75640 +INFO:__main__: Gradient checkpointing: True +INFO:__main__: Use scan: False +INFO:__main__: Fuse matmuls: False +Epoch ... (1/40): 0%| | 0/40 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) + File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback + return fun(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss + out_tree, out_flat = f_pmapped_(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f + out = pxla.xla_pmap( + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind + return map_bind(self, fun, *args, **params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind + outs = primitive.process(top_trace, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process + return trace.process_map(self, fun, tracers, params) + File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call + return primitive.impl(f, *tracers, **params) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 841, in xla_pmap_impl + return compiled_fun(*args) + File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper + return func(*args, **kwargs) + File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1656, in __call__ + out_bufs = self.xla_executable.execute_sharded_on_local_devices(input_bufs) +jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: Attempting to reserve 6.77G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 6.48G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation. The nearest obstacle is at 6.48G from the bottom with size 16.0K.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). +The stack trace below excludes JAX-internal frames. +The preceding is the original exception that occurred, unmodified. +-------------------- +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "run_flax_speech_recognition_ctc.py", line 1605, in + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) +jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: Attempting to reserve 6.77G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 6.48G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation. The nearest obstacle is at 6.48G from the bottom with size 16.0K.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20220730_122457-1iypf07q/files/requirements.txt b/wandb/run-20220730_122457-1iypf07q/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220730_122457-1iypf07q/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220730_122457-1iypf07q/files/wandb-metadata.json b/wandb/run-20220730_122457-1iypf07q/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..caee519277882b61ef54aff75439348abe026bd7 --- /dev/null +++ b/wandb/run-20220730_122457-1iypf07q/files/wandb-metadata.json @@ -0,0 +1,67 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-30T12:25:01.338750", + "startedAt": "2022-07-30T12:24:57.754509", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=./", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=20", + "--per_device_eval_batch_size=20", + "--gradient_accumulation_steps=1", + "--precision=full_mixed", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json b/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..894a2464300a4d110d2d70632e4ce2598fa9d6a3 --- /dev/null +++ b/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 1026}} \ No newline at end of file diff --git a/wandb/run-20220730_122457-1iypf07q/logs/debug-internal.log b/wandb/run-20220730_122457-1iypf07q/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..44835b6950128a4427f042e6875c66e453d84a1a --- /dev/null +++ b/wandb/run-20220730_122457-1iypf07q/logs/debug-internal.log @@ -0,0 +1,584 @@ +2022-07-30 12:24:58,729 INFO MainThread:6773 [internal.py:wandb_internal():87] W&B internal server running at pid: 6773, started at: 2022-07-30 12:24:58.729025 +2022-07-30 12:24:58,730 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: check_version +2022-07-30 12:24:58,731 INFO WriterThread:6773 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/run-1iypf07q.wandb +2022-07-30 12:24:58,732 DEBUG SenderThread:6773 [sender.py:send():234] send: header +2022-07-30 12:24:58,732 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: check_version +2022-07-30 12:24:58,769 DEBUG SenderThread:6773 [sender.py:send():234] send: run +2022-07-30 12:24:58,965 INFO SenderThread:6773 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files +2022-07-30 12:24:58,965 INFO SenderThread:6773 [sender.py:_start_run_threads():804] run started: 1iypf07q with start time 1659183897 +2022-07-30 12:24:58,965 DEBUG SenderThread:6773 [sender.py:send():234] send: summary +2022-07-30 12:24:58,965 INFO SenderThread:6773 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 12:24:58,966 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: run_start +2022-07-30 12:24:59,969 INFO Thread-8 :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json +2022-07-30 12:25:01,338 DEBUG HandlerThread:6773 [meta.py:__init__():40] meta init +2022-07-30 12:25:01,338 DEBUG HandlerThread:6773 [meta.py:__init__():54] meta init done +2022-07-30 12:25:01,338 DEBUG HandlerThread:6773 [meta.py:probe():214] probe +2022-07-30 12:25:01,339 DEBUG HandlerThread:6773 [meta.py:_setup_git():204] setup git +2022-07-30 12:25:01,369 DEBUG HandlerThread:6773 [meta.py:_setup_git():211] setup git done +2022-07-30 12:25:01,369 DEBUG HandlerThread:6773 [meta.py:_save_code():92] save code +2022-07-30 12:25:01,380 DEBUG HandlerThread:6773 [meta.py:_save_code():113] save code done +2022-07-30 12:25:01,380 DEBUG HandlerThread:6773 [meta.py:_save_patches():130] save patches +2022-07-30 12:25:01,488 DEBUG HandlerThread:6773 [meta.py:_save_patches():172] save patches done +2022-07-30 12:25:01,489 DEBUG HandlerThread:6773 [meta.py:_save_pip():58] save pip +2022-07-30 12:25:01,489 DEBUG HandlerThread:6773 [meta.py:_save_pip():72] save pip done +2022-07-30 12:25:01,489 DEBUG HandlerThread:6773 [meta.py:probe():252] probe done +2022-07-30 12:25:01,536 DEBUG SenderThread:6773 [sender.py:send():234] send: files +2022-07-30 12:25:01,536 INFO SenderThread:6773 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-30 12:25:01,536 INFO SenderThread:6773 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-30 12:25:01,537 INFO SenderThread:6773 [sender.py:_save_file():939] saving file diff.patch with policy now +2022-07-30 12:25:01,542 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:25:01,543 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:25:01,994 INFO Thread-8 :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:01,995 INFO Thread-8 :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/requirements.txt +2022-07-30 12:25:01,995 INFO Thread-8 :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/diff.patch +2022-07-30 12:25:01,995 INFO Thread-8 :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/code/run_flax_speech_recognition_ctc.py +2022-07-30 12:25:01,995 INFO Thread-8 :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-metadata.json +2022-07-30 12:25:01,995 INFO Thread-8 :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/code +2022-07-30 12:25:02,018 INFO Thread-13 :6773 [upload_job.py:push():137] Uploaded file /tmp/tmpzmsvph2bwandb/3oeo3rpo-diff.patch +2022-07-30 12:25:02,045 INFO Thread-11 :6773 [upload_job.py:push():137] Uploaded file /tmp/tmpzmsvph2bwandb/110yn7af-wandb-metadata.json +2022-07-30 12:25:02,282 INFO Thread-12 :6773 [upload_job.py:push():137] Uploaded file /tmp/tmpzmsvph2bwandb/2cmn72j4-code/run_flax_speech_recognition_ctc.py +2022-07-30 12:25:03,995 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:05,996 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:11,998 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:13,999 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:16,000 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:16,686 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:25:16,687 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:25:18,001 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:20,002 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:22,003 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:24,003 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:26,004 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:28,005 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:29,416 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:25:30,006 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:31,818 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:25:31,818 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:25:32,007 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:34,008 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:42,011 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:46,955 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:25:46,955 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:25:58,017 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:25:59,489 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:26:02,091 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:26:02,092 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:26:17,226 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:26:17,226 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:26:29,563 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:26:32,031 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:32,381 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:26:32,382 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:26:39,034 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:41,035 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:43,036 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:45,037 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:47,038 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:47,547 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:26:47,547 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:26:49,038 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:51,039 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:53,040 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:55,041 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:57,042 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:59,043 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:26:59,634 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:27:02,685 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:27:02,686 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:27:11,048 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:27:13,049 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:27:15,050 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:27:17,826 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:27:17,826 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:27:29,705 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:27:32,976 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:27:32,977 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:27:48,225 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:27:48,225 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:27:51,066 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:27:53,067 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:27:55,068 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:27:57,069 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:27:59,070 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:27:59,777 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:28:01,071 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:03,072 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:03,369 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:28:03,370 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:28:05,073 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:07,074 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:09,074 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:11,075 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:13,077 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:16,078 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:18,079 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:18,515 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:28:18,515 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:28:20,080 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:22,080 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:24,081 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:26,082 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:28,083 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:29,851 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:28:30,084 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:32,085 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:33,651 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:28:33,652 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:28:34,087 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:36,088 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:38,089 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:40,089 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:42,090 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:44,091 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:46,092 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:48,093 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:48,789 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:28:48,789 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:28:50,094 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:52,095 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:54,096 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:56,097 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:58,098 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:28:59,924 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:29:00,099 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:02,100 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:03,947 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:29:03,947 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:29:04,101 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:06,102 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:08,103 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:10,105 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:12,106 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:14,107 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:16,108 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:18,109 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:19,085 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:29:19,085 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:29:20,110 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:22,111 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:24,112 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:26,113 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:28,114 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:29,999 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:29:30,115 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:32,117 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:34,118 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:34,227 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:29:34,227 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:29:36,119 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:38,121 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:40,122 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:42,123 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:44,124 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:46,125 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:48,127 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:49,373 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:29:49,373 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:29:50,128 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:52,129 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:54,130 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:56,131 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:29:58,132 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:00,080 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:30:00,134 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:02,135 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:04,136 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:04,514 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:30:04,515 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:30:06,137 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:08,138 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:10,139 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:12,141 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:14,142 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:16,143 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:18,144 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:19,689 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:30:19,689 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:30:20,145 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:22,146 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:24,148 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:26,149 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:28,150 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:30,151 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:30,155 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:30:32,153 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:34,154 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:34,827 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:30:34,827 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:30:36,155 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:38,156 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:40,157 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:42,158 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:44,159 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:46,161 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:48,162 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:49,979 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:30:49,980 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:30:50,165 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:52,164 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:54,165 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:56,166 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:30:58,168 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:00,169 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:00,236 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:31:02,170 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:04,172 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:05,139 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:31:05,139 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:31:06,173 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:08,174 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:10,175 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:12,176 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:14,177 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:16,179 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:18,180 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:20,181 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:20,283 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:31:20,283 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:31:22,182 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:24,183 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:26,184 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:28,185 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:30,186 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:30,317 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:31:32,187 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:34,188 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:35,447 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:31:35,447 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:31:36,189 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:38,190 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:40,191 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:42,192 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:44,194 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:47,195 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:49,196 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:50,586 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:31:50,587 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:31:51,198 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:53,199 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:55,200 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:57,202 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:31:59,203 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:00,399 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:32:01,204 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:03,205 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:05,206 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:05,722 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:32:05,722 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:32:07,207 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:09,209 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:11,210 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:13,212 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:15,213 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:17,215 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:19,215 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:20,859 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:32:20,859 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:32:21,217 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:23,218 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:25,219 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:27,220 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:29,221 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:30,481 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:32:31,221 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:33,222 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:35,223 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:36,013 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:32:36,014 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:32:37,229 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:39,230 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:41,231 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:43,232 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:45,234 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:47,235 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:49,236 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:51,168 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:32:51,168 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:32:51,237 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:53,238 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:55,238 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:57,239 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:32:59,241 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:33:00,557 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:33:01,241 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:33:03,242 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:33:06,308 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:33:06,308 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:33:21,443 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:33:21,443 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:33:30,634 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:33:36,577 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:33:36,578 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:33:51,716 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:33:51,716 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:33:53,261 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:33:55,263 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:33:57,264 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:33:59,265 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:00,709 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:34:01,266 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:03,267 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:05,268 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:06,865 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:34:06,865 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:34:07,269 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:09,270 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:11,271 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:13,272 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:15,273 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:17,274 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:19,275 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:21,276 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:22,007 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:34:22,007 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:34:23,277 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:27,279 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:29,280 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:30,782 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:34:31,281 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:33,281 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:35,282 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:34:37,139 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:34:37,140 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:34:52,288 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:34:52,288 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:35:00,856 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:35:07,441 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:35:07,442 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:35:09,297 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:11,298 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:13,299 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:15,300 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:17,300 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:19,301 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:21,302 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:22,661 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:35:22,662 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:35:23,303 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:25,304 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:27,305 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:29,306 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:30,934 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:35:31,307 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:33,308 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:35,309 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:37,309 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:37,819 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:35:37,820 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:35:39,311 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:41,312 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:43,313 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:45,314 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:47,315 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:49,315 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:51,316 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:52,958 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:35:52,958 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:35:53,317 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:55,318 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:57,319 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:35:59,320 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:01,018 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:36:01,321 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:03,322 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:05,323 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:07,324 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:08,111 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:36:08,112 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:36:09,326 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:11,326 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:13,327 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:15,328 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:17,329 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:19,330 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:21,331 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:23,261 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:36:23,262 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:36:23,333 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:25,333 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:27,334 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:30,335 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:31,093 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:36:32,336 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:36:38,404 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:36:38,404 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:36:53,537 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:36:53,537 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:37:01,177 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:37:08,689 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:37:08,690 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:37:14,353 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:37:18,355 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:37:22,357 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:37:23,948 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:37:23,948 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:37:26,358 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:37:30,360 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:37:31,256 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:37:32,361 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:37:36,363 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:37:38,364 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:37:39,151 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:37:39,151 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:37:41,365 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:37:47,368 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:37:54,451 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:37:54,451 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:38:01,333 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:38:09,730 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:38:09,730 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:38:24,900 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:38:24,900 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:38:26,384 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:38:31,412 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:38:34,387 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:38:36,388 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:38:40,067 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:38:40,067 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:38:55,237 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:38:55,238 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:39:01,491 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:39:10,390 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:39:10,390 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:39:25,522 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:39:25,523 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:39:31,568 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:39:40,661 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:39:40,661 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:39:55,794 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:39:55,795 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:40:01,642 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:40:10,937 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:40:10,937 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:40:26,071 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:40:26,071 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:40:31,715 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:40:41,207 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:40:41,207 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:40:56,343 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:40:56,344 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:41:01,785 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:41:11,484 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:41:11,484 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:41:26,638 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:41:26,639 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:41:31,856 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:41:41,777 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:41:41,777 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:41:56,911 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:41:56,911 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:42:01,934 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:42:03,472 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:42:05,135 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:05,135 DEBUG SenderThread:6773 [sender.py:send():234] send: telemetry +2022-07-30 12:42:05,135 DEBUG SenderThread:6773 [sender.py:send():234] send: exit +2022-07-30 12:42:05,135 INFO SenderThread:6773 [sender.py:send_exit():366] handling exit code: 1 +2022-07-30 12:42:05,136 INFO SenderThread:6773 [sender.py:send_exit():368] handling runtime: 1026 +2022-07-30 12:42:05,136 INFO SenderThread:6773 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 12:42:05,136 INFO SenderThread:6773 [sender.py:send_exit():374] send defer +2022-07-30 12:42:05,136 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:05,137 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:42:05,137 INFO HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-30 12:42:05,138 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: defer +2022-07-30 12:42:05,138 INFO SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-30 12:42:05,138 INFO SenderThread:6773 [sender.py:transition_state():387] send defer: 1 +2022-07-30 12:42:05,138 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:42:05,138 INFO HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-30 12:42:05,142 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: defer +2022-07-30 12:42:05,142 INFO SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-30 12:42:05,142 INFO SenderThread:6773 [sender.py:transition_state():387] send defer: 2 +2022-07-30 12:42:05,142 DEBUG SenderThread:6773 [sender.py:send():234] send: stats +2022-07-30 12:42:05,143 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:42:05,143 INFO HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-30 12:42:05,143 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: defer +2022-07-30 12:42:05,143 INFO SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-30 12:42:05,143 INFO SenderThread:6773 [sender.py:transition_state():387] send defer: 3 +2022-07-30 12:42:05,143 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:42:05,143 INFO HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-30 12:42:05,143 DEBUG SenderThread:6773 [sender.py:send():234] send: summary +2022-07-30 12:42:05,144 INFO SenderThread:6773 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 12:42:05,144 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: defer +2022-07-30 12:42:05,144 INFO SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-30 12:42:05,144 INFO SenderThread:6773 [sender.py:transition_state():387] send defer: 4 +2022-07-30 12:42:05,144 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:42:05,144 INFO HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-30 12:42:05,144 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: defer +2022-07-30 12:42:05,144 INFO SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-30 12:42:05,239 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:05,303 INFO SenderThread:6773 [sender.py:transition_state():387] send defer: 5 +2022-07-30 12:42:05,303 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:05,303 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:42:05,303 INFO HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-30 12:42:05,304 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: defer +2022-07-30 12:42:05,304 INFO SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-30 12:42:05,304 INFO SenderThread:6773 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-30 12:42:05,404 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:05,472 INFO Thread-8 :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:42:05,473 INFO SenderThread:6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/config.yaml +2022-07-30 12:42:05,473 INFO SenderThread:6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json +2022-07-30 12:42:05,473 INFO SenderThread:6773 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files +2022-07-30 12:42:05,473 INFO SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/config.yaml config.yaml +2022-07-30 12:42:05,474 INFO SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/diff.patch diff.patch +2022-07-30 12:42:05,474 INFO SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/requirements.txt requirements.txt +2022-07-30 12:42:05,474 INFO SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log output.log +2022-07-30 12:42:05,474 INFO SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json wandb-summary.json +2022-07-30 12:42:05,474 INFO SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-metadata.json wandb-metadata.json +2022-07-30 12:42:05,477 INFO SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-30 12:42:05,477 INFO SenderThread:6773 [sender.py:transition_state():387] send defer: 6 +2022-07-30 12:42:05,477 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:05,478 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:42:05,480 INFO HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-30 12:42:05,483 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: defer +2022-07-30 12:42:05,484 INFO SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-30 12:42:05,484 INFO SenderThread:6773 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 12:42:05,582 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:05,582 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:05,683 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:05,683 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:05,785 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:05,785 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:05,886 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:05,886 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:05,932 INFO Thread-17 :6773 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json +2022-07-30 12:42:05,943 INFO Thread-14 :6773 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/config.yaml +2022-07-30 12:42:05,963 INFO Thread-15 :6773 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/requirements.txt +2022-07-30 12:42:05,987 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:05,988 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:06,089 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:06,089 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:06,154 INFO Thread-16 :6773 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log +2022-07-30 12:42:06,190 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:06,191 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:06,292 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:06,292 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:06,355 INFO Thread-7 :6773 [sender.py:transition_state():387] send defer: 7 +2022-07-30 12:42:06,356 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:42:06,356 INFO HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-30 12:42:06,356 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: defer +2022-07-30 12:42:06,356 INFO SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-30 12:42:06,394 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:06,763 INFO SenderThread:6773 [sender.py:transition_state():387] send defer: 8 +2022-07-30 12:42:06,763 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:06,764 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:42:06,764 INFO HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-30 12:42:06,764 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: defer +2022-07-30 12:42:06,764 INFO SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-30 12:42:06,764 INFO SenderThread:6773 [sender.py:transition_state():387] send defer: 9 +2022-07-30 12:42:06,765 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer +2022-07-30 12:42:06,765 INFO HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-30 12:42:06,765 DEBUG SenderThread:6773 [sender.py:send():234] send: final +2022-07-30 12:42:06,765 DEBUG SenderThread:6773 [sender.py:send():234] send: footer +2022-07-30 12:42:06,765 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: defer +2022-07-30 12:42:06,765 INFO SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-30 12:42:06,865 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 12:42:06,865 DEBUG SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 12:42:06,865 INFO SenderThread:6773 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 12:42:07,121 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: get_summary +2022-07-30 12:42:07,122 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-30 12:42:07,123 DEBUG HandlerThread:6773 [handler.py:handle_request():130] handle_request: shutdown +2022-07-30 12:42:07,123 INFO HandlerThread:6773 [handler.py:finish():731] shutting down handler +2022-07-30 12:42:07,765 INFO WriterThread:6773 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/run-1iypf07q.wandb +2022-07-30 12:42:08,120 INFO SenderThread:6773 [sender.py:finish():1070] shutting down sender +2022-07-30 12:42:08,120 INFO SenderThread:6773 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 12:42:08,120 INFO SenderThread:6773 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 12:42:08,123 INFO MainThread:6773 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220730_122457-1iypf07q/logs/debug.log b/wandb/run-20220730_122457-1iypf07q/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..376298cce4720df6df3f79b173e869941adc502a --- /dev/null +++ b/wandb/run-20220730_122457-1iypf07q/logs/debug.log @@ -0,0 +1,148 @@ +2022-07-30 12:24:57,797 INFO MainThread:4852 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-30 12:24:57,797 INFO MainThread:4852 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-30 12:24:57,797 INFO MainThread:4852 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/logs/debug.log +2022-07-30 12:24:57,797 INFO MainThread:4852 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/logs/debug-internal.log +2022-07-30 12:24:57,798 INFO MainThread:4852 [wandb_init.py:init():404] calling init triggers +2022-07-30 12:24:57,798 INFO MainThread:4852 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-30 12:24:57,798 INFO MainThread:4852 [wandb_init.py:init():460] starting backend +2022-07-30 12:24:57,798 INFO MainThread:4852 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-30 12:24:57,868 INFO MainThread:4852 [backend.py:ensure_launched():216] starting backend process... +2022-07-30 12:24:57,894 INFO MainThread:4852 [backend.py:ensure_launched():221] started backend process with pid: 6773 +2022-07-30 12:24:57,896 INFO MainThread:4852 [wandb_init.py:init():469] backend started and connected +2022-07-30 12:24:57,909 INFO MainThread:4852 [wandb_init.py:init():533] updated telemetry +2022-07-30 12:24:57,975 INFO MainThread:4852 [wandb_init.py:init():563] communicating current version +2022-07-30 12:24:58,768 INFO MainThread:4852 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-30 12:24:58,768 INFO MainThread:4852 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-30 12:24:58,965 INFO MainThread:4852 [wandb_init.py:init():606] starting run threads in backend +2022-07-30 12:25:01,539 INFO MainThread:4852 [wandb_run.py:_console_start():1810] atexit reg +2022-07-30 12:25:01,540 INFO MainThread:4852 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-30 12:25:01,541 INFO MainThread:4852 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-30 12:25:01,543 INFO MainThread:4852 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-30 12:25:01,543 INFO MainThread:4852 [wandb_init.py:init():633] run started, returning control to user process +2022-07-30 12:42:02,970 INFO MainThread:4852 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-30 12:42:02,975 INFO MainThread:4852 [wandb_run.py:_restore():1752] restore +2022-07-30 12:42:05,137 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 74351 +} + +2022-07-30 12:42:05,303 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 74351 +} + +2022-07-30 12:42:05,481 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 336868 +} + +2022-07-30 12:42:05,582 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 336868 +} + +2022-07-30 12:42:05,684 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 336868 + total_bytes: 336868 +} + +2022-07-30 12:42:05,785 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 336868 + total_bytes: 336868 +} + +2022-07-30 12:42:05,887 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 336868 + total_bytes: 336868 +} + +2022-07-30 12:42:05,988 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 336868 + total_bytes: 336868 +} + +2022-07-30 12:42:06,089 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 336868 + total_bytes: 336868 +} + +2022-07-30 12:42:06,191 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 336868 + total_bytes: 336868 +} + +2022-07-30 12:42:06,293 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 336868 + total_bytes: 336868 +} + +2022-07-30 12:42:06,764 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 336868 + total_bytes: 336868 +} + +2022-07-30 12:42:07,120 INFO MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 336868 + total_bytes: 336868 +} +local_info { +} + +2022-07-30 12:42:10,566 INFO MainThread:4852 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220730_122457-1iypf07q/run-1iypf07q.wandb b/wandb/run-20220730_122457-1iypf07q/run-1iypf07q.wandb new file mode 100644 index 0000000000000000000000000000000000000000..09be673e3622b0ed1eb7b9b9496b1d01a0cf630e --- /dev/null +++ b/wandb/run-20220730_122457-1iypf07q/run-1iypf07q.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df3eec25c410cbde4963b6493592cf98914d7abc365c2e88e74249bcc895a9b +size 386029 diff --git a/wandb/run-20220730_124505-101ubxa3/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_124505-101ubxa3/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac --- /dev/null +++ b/wandb/run-20220730_124505-101ubxa3/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220730_124505-101ubxa3/files/config.yaml b/wandb/run-20220730_124505-101ubxa3/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c6c985261e2400770199c7090235b75fc0cad92f --- /dev/null +++ b/wandb/run-20220730_124505-101ubxa3/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659185105 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220730_124505-101ubxa3/files/diff.patch b/wandb/run-20220730_124505-101ubxa3/files/diff.patch new file mode 100644 index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642 --- /dev/null +++ b/wandb/run-20220730_124505-101ubxa3/files/diff.patch @@ -0,0 +1,10 @@ +diff --git a/.gitattributes b/.gitattributes +index 755356a..f0eef4b 100644 +--- a/.gitattributes ++++ b/.gitattributes +@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text + *.zip filter=lfs diff=lfs merge=lfs -text + *.zstandard filter=lfs diff=lfs merge=lfs -text + *tfevents* filter=lfs diff=lfs merge=lfs -text ++*.wandb filter=lfs diff=lfs merge=lfs -text +\ No newline at end of file diff --git a/wandb/run-20220730_124505-101ubxa3/files/output.log b/wandb/run-20220730_124505-101ubxa3/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..746cee0d5a85642b26e86bbd49088b3816e54454 --- /dev/null +++ b/wandb/run-20220730_124505-101ubxa3/files/output.log @@ -0,0 +1,1372 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul30_12-45-01_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=16, +precision=full_mixed, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 81.78it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 440.49it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'kernel'), ('project_hid', 'bias'), ('quantizer', 'codevectors'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'bias'), ('quantizer', 'weight_proj', 'bias'), ('project_hid', 'kernel')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 8%|████████████████▎ | 784/9523 [00:00<00:01, 7838.49ex/s] +removing punctuation from train split #1: 0%| | 0/9523 [00:00 to the vocabulary +Adding to the vocabulary +2022-07-30 12:56:30.042939: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2022-07-30 12:56:30.042991: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) +/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) +INFO:__main__:***** Running training ***** +INFO:__main__: Num examples = 302693 +INFO:__main__: Num Epochs = 40 +INFO:__main__: Instantaneous batch size per device = 16 +INFO:__main__: Num gradient accumulation steps = 1 +INFO:__main__: Total train batch size (w. parallel & distributed) = 128 +INFO:__main__: Total optimization steps = 94560 +INFO:__main__: Gradient checkpointing: True +INFO:__main__: Use scan: False +INFO:__main__: Fuse matmuls: False +Epoch ... (1/40): 0%| | 0/40 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 1517, in main + state, train_metric = p_train_step(state, batch) +ValueError: RESOURCE_EXHAUSTED: Attempting to reserve 5.81G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 5.56G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation. The nearest obstacle is at 5.56G from the bottom with size 19.53M.: while running replica 1 and partition 0 of a replicated computation (other replicas may have failed as well). \ No newline at end of file diff --git a/wandb/run-20220730_124505-101ubxa3/files/requirements.txt b/wandb/run-20220730_124505-101ubxa3/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220730_124505-101ubxa3/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220730_124505-101ubxa3/files/wandb-metadata.json b/wandb/run-20220730_124505-101ubxa3/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..8d31bf5a3841b1f7cf1d6e8c36cbfc3db00c9dce --- /dev/null +++ b/wandb/run-20220730_124505-101ubxa3/files/wandb-metadata.json @@ -0,0 +1,67 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-30T12:45:09.201824", + "startedAt": "2022-07-30T12:45:05.740121", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=./", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=16", + "--per_device_eval_batch_size=16", + "--gradient_accumulation_steps=1", + "--precision=full_mixed", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json b/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..39cfa6cf1cd3578c40691bd1018fb22dbebc7bef --- /dev/null +++ b/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/grad_norm": 8.5, "layer_grad_norm/": {"lm_head": {"bias": 0.2890625, "kernel": 5.5}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.21875, "scale": 0.20703125}, "layers": {"0": {"attention": {"k_proj": {"bias": 6.4849853515625e-05, "kernel": 0.0556640625}, "out_proj": {"bias": 0.017578125, "kernel": 0.1708984375}, "q_proj": {"bias": 0.0052490234375, "kernel": 0.07373046875}, "v_proj": {"bias": 0.01458740234375, "kernel": 0.12890625}}, "feed_forward": {"intermediate_dense": {"bias": 0.019775390625, "kernel": 0.263671875}, "output_dense": {"bias": 0.01055908203125, "kernel": 0.224609375}}, "final_layer_norm": {"bias": 0.0478515625, "scale": 0.0625}, "layer_norm": {"bias": 0.03271484375, "scale": 0.083984375}}, "1": {"attention": {"k_proj": {"bias": 3.62396240234375e-05, "kernel": 0.018310546875}, "out_proj": {"bias": 0.0123291015625, "kernel": 0.130859375}, "q_proj": {"bias": 0.001708984375, "kernel": 0.021240234375}, "v_proj": {"bias": 0.0169677734375, "kernel": 0.11083984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0130615234375, "kernel": 0.189453125}, "output_dense": {"bias": 0.0111083984375, "kernel": 0.1689453125}}, "final_layer_norm": {"bias": 0.0242919921875, "scale": 0.03125}, "layer_norm": {"bias": 0.026123046875, "scale": 0.0283203125}}, "10": {"attention": {"k_proj": {"bias": 1.4543533325195312e-05, "kernel": 0.033203125}, "out_proj": {"bias": 0.0115966796875, "kernel": 0.1015625}, "q_proj": {"bias": 0.0024566650390625, "kernel": 0.03759765625}, "v_proj": {"bias": 0.017333984375, "kernel": 0.134765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.012451171875, "kernel": 0.162109375}, "output_dense": {"bias": 0.0101318359375, "kernel": 0.12890625}}, "final_layer_norm": {"bias": 0.0224609375, "scale": 0.0230712890625}, "layer_norm": {"bias": 0.03125, "scale": 0.0203857421875}}, "11": {"attention": {"k_proj": {"bias": 1.33514404296875e-05, "kernel": 0.02783203125}, "out_proj": {"bias": 0.01080322265625, "kernel": 0.1064453125}, "q_proj": {"bias": 0.001983642578125, "kernel": 0.029541015625}, "v_proj": {"bias": 0.0152587890625, "kernel": 0.1328125}}, "feed_forward": {"intermediate_dense": {"bias": 0.01031494140625, "kernel": 0.1416015625}, "output_dense": {"bias": 0.0093994140625, "kernel": 0.11572265625}}, "final_layer_norm": {"bias": 0.02099609375, "scale": 0.017333984375}, "layer_norm": {"bias": 0.02490234375, "scale": 0.0133056640625}}, "12": {"attention": {"k_proj": {"bias": 1.2159347534179688e-05, "kernel": 0.0286865234375}, "out_proj": {"bias": 0.01019287109375, "kernel": 0.0908203125}, "q_proj": {"bias": 0.001678466796875, "kernel": 0.0289306640625}, "v_proj": {"bias": 0.01239013671875, "kernel": 0.1064453125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00860595703125, "kernel": 0.109375}, "output_dense": {"bias": 0.00982666015625, "kernel": 0.10888671875}}, "final_layer_norm": {"bias": 0.01507568359375, "scale": 0.0189208984375}, "layer_norm": {"bias": 0.0185546875, "scale": 0.013916015625}}, "13": {"attention": {"k_proj": {"bias": 1.2636184692382812e-05, "kernel": 0.03173828125}, "out_proj": {"bias": 0.0108642578125, "kernel": 0.10498046875}, "q_proj": {"bias": 0.0025634765625, "kernel": 0.03515625}, "v_proj": {"bias": 0.01397705078125, "kernel": 0.12158203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00927734375, "kernel": 0.11279296875}, "output_dense": {"bias": 0.010009765625, "kernel": 0.11376953125}}, "final_layer_norm": {"bias": 0.017822265625, "scale": 0.02294921875}, "layer_norm": {"bias": 0.021240234375, "scale": 0.031494140625}}, "14": {"attention": {"k_proj": {"bias": 1.3113021850585938e-05, "kernel": 0.0255126953125}, "out_proj": {"bias": 0.010009765625, "kernel": 0.0966796875}, "q_proj": {"bias": 0.0020751953125, "kernel": 0.02587890625}, "v_proj": {"bias": 0.0107421875, "kernel": 0.09716796875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0081787109375, "kernel": 0.1064453125}, "output_dense": {"bias": 0.0091552734375, "kernel": 0.1123046875}}, "final_layer_norm": {"bias": 0.0191650390625, "scale": 0.01336669921875}, "layer_norm": {"bias": 0.0162353515625, "scale": 0.024169921875}}, "15": {"attention": {"k_proj": {"bias": 8.404254913330078e-06, "kernel": 0.0185546875}, "out_proj": {"bias": 0.00958251953125, "kernel": 0.09375}, "q_proj": {"bias": 0.0011749267578125, "kernel": 0.01708984375}, "v_proj": {"bias": 0.00982666015625, "kernel": 0.0849609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.006317138671875, "kernel": 0.07470703125}, "output_dense": {"bias": 0.0096435546875, "kernel": 0.10205078125}}, "final_layer_norm": {"bias": 0.010986328125, "scale": 0.0125732421875}, "layer_norm": {"bias": 0.012939453125, "scale": 0.009033203125}}, "16": {"attention": {"k_proj": {"bias": 5.990266799926758e-06, "kernel": 0.017578125}, "out_proj": {"bias": 0.00982666015625, "kernel": 0.078125}, "q_proj": {"bias": 0.00110626220703125, "kernel": 0.016357421875}, "v_proj": {"bias": 0.00982666015625, "kernel": 0.0791015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.00543212890625, "kernel": 0.064453125}, "output_dense": {"bias": 0.009765625, "kernel": 0.0986328125}}, "final_layer_norm": {"bias": 0.00927734375, "scale": 0.013671875}, "layer_norm": {"bias": 0.01416015625, "scale": 0.0125732421875}}, "17": {"attention": {"k_proj": {"bias": 8.344650268554688e-06, "kernel": 0.019287109375}, "out_proj": {"bias": 0.01043701171875, "kernel": 0.0869140625}, "q_proj": {"bias": 0.001495361328125, "kernel": 0.0194091796875}, "v_proj": {"bias": 0.0108642578125, "kernel": 0.08544921875}}, "feed_forward": {"intermediate_dense": {"bias": 0.00531005859375, "kernel": 0.06005859375}, "output_dense": {"bias": 0.0106201171875, "kernel": 0.1025390625}}, "final_layer_norm": {"bias": 0.009033203125, "scale": 0.0107421875}, "layer_norm": {"bias": 0.015625, "scale": 0.013671875}}, "18": {"attention": {"k_proj": {"bias": 5.930662155151367e-06, "kernel": 0.015869140625}, "out_proj": {"bias": 0.0107421875, "kernel": 0.08154296875}, "q_proj": {"bias": 0.00109100341796875, "kernel": 0.01434326171875}, "v_proj": {"bias": 0.01080322265625, "kernel": 0.0830078125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00482177734375, "kernel": 0.05517578125}, "output_dense": {"bias": 0.0108642578125, "kernel": 0.1064453125}}, "final_layer_norm": {"bias": 0.0079345703125, "scale": 0.0074462890625}, "layer_norm": {"bias": 0.01513671875, "scale": 0.017578125}}, "19": {"attention": {"k_proj": {"bias": 4.470348358154297e-06, "kernel": 0.0128173828125}, "out_proj": {"bias": 0.0113525390625, "kernel": 0.07763671875}, "q_proj": {"bias": 0.00091552734375, "kernel": 0.01092529296875}, "v_proj": {"bias": 0.0115966796875, "kernel": 0.08203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.004638671875, "kernel": 0.05224609375}, "output_dense": {"bias": 0.011474609375, "kernel": 0.111328125}}, "final_layer_norm": {"bias": 0.0074462890625, "scale": 0.0106201171875}, "layer_norm": {"bias": 0.015625, "scale": 0.0133056640625}}, "2": {"attention": {"k_proj": {"bias": 3.528594970703125e-05, "kernel": 0.024169921875}, "out_proj": {"bias": 0.013427734375, "kernel": 0.1396484375}, "q_proj": {"bias": 0.001983642578125, "kernel": 0.0283203125}, "v_proj": {"bias": 0.0186767578125, "kernel": 0.1484375}}, "feed_forward": {"intermediate_dense": {"bias": 0.01422119140625, "kernel": 0.23046875}, "output_dense": {"bias": 0.0123291015625, "kernel": 0.17578125}}, "final_layer_norm": {"bias": 0.025146484375, "scale": 0.029541015625}, "layer_norm": {"bias": 0.0262451171875, "scale": 0.01904296875}}, "20": {"attention": {"k_proj": {"bias": 3.1888484954833984e-06, "kernel": 0.008056640625}, "out_proj": {"bias": 0.01214599609375, "kernel": 0.07080078125}, "q_proj": {"bias": 0.00057220458984375, "kernel": 0.00665283203125}, "v_proj": {"bias": 0.01220703125, "kernel": 0.0810546875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0048828125, "kernel": 0.0517578125}, "output_dense": {"bias": 0.01220703125, "kernel": 0.119140625}}, "final_layer_norm": {"bias": 0.007720947265625, "scale": 0.008544921875}, "layer_norm": {"bias": 0.0169677734375, "scale": 0.010986328125}}, "21": {"attention": {"k_proj": {"bias": 3.6954879760742188e-06, "kernel": 0.00921630859375}, "out_proj": {"bias": 0.0128173828125, "kernel": 0.0859375}, "q_proj": {"bias": 0.00066375732421875, "kernel": 0.00799560546875}, "v_proj": {"bias": 0.01300048828125, "kernel": 0.091796875}}, "feed_forward": {"intermediate_dense": {"bias": 0.00518798828125, "kernel": 0.056396484375}, "output_dense": {"bias": 0.01300048828125, "kernel": 0.12890625}}, "final_layer_norm": {"bias": 0.008544921875, "scale": 0.010009765625}, "layer_norm": {"bias": 0.0169677734375, "scale": 0.01287841796875}}, "22": {"attention": {"k_proj": {"bias": 5.841255187988281e-06, "kernel": 0.01361083984375}, "out_proj": {"bias": 0.013916015625, "kernel": 0.09375}, "q_proj": {"bias": 0.001373291015625, "kernel": 0.012939453125}, "v_proj": {"bias": 0.01409912109375, "kernel": 0.09765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.006195068359375, "kernel": 0.064453125}, "output_dense": {"bias": 0.01416015625, "kernel": 0.140625}}, "final_layer_norm": {"bias": 0.0101318359375, "scale": 0.0244140625}, "layer_norm": {"bias": 0.019775390625, "scale": 0.02294921875}}, "23": {"attention": {"k_proj": {"bias": 7.987022399902344e-06, "kernel": 0.01544189453125}, "out_proj": {"bias": 0.01513671875, "kernel": 0.1240234375}, "q_proj": {"bias": 0.00118255615234375, "kernel": 0.01458740234375}, "v_proj": {"bias": 0.0159912109375, "kernel": 0.1259765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.006103515625, "kernel": 0.0615234375}, "output_dense": {"bias": 0.01544189453125, "kernel": 0.1435546875}}, "final_layer_norm": {"bias": 0.009521484375, "scale": 0.0093994140625}, "layer_norm": {"bias": 0.021728515625, "scale": 0.0106201171875}}, "24": {"attention": {"k_proj": {"bias": 4.708766937255859e-06, "kernel": 0.01177978515625}, "out_proj": {"bias": 0.0155029296875, "kernel": 0.1181640625}, "q_proj": {"bias": 0.0009918212890625, "kernel": 0.01123046875}, "v_proj": {"bias": 0.016357421875, "kernel": 0.1220703125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00634765625, "kernel": 0.06640625}, "output_dense": {"bias": 0.015869140625, "kernel": 0.146484375}}, "final_layer_norm": {"bias": 0.01025390625, "scale": 0.0101318359375}, "layer_norm": {"bias": 0.022705078125, "scale": 0.015625}}, "25": {"attention": {"k_proj": {"bias": 7.212162017822266e-06, "kernel": 0.017333984375}, "out_proj": {"bias": 0.01708984375, "kernel": 0.150390625}, "q_proj": {"bias": 0.001617431640625, "kernel": 0.017822265625}, "v_proj": {"bias": 0.0184326171875, "kernel": 0.1474609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00830078125, "kernel": 0.0859375}, "output_dense": {"bias": 0.0177001953125, "kernel": 0.1640625}}, "final_layer_norm": {"bias": 0.015625, "scale": 0.041015625}, "layer_norm": {"bias": 0.026123046875, "scale": 0.0205078125}}, "26": {"attention": {"k_proj": {"bias": 4.9173831939697266e-06, "kernel": 0.016845703125}, "out_proj": {"bias": 0.018798828125, "kernel": 0.150390625}, "q_proj": {"bias": 0.00164794921875, "kernel": 0.018310546875}, "v_proj": {"bias": 0.0206298828125, "kernel": 0.1669921875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0103759765625, "kernel": 0.1044921875}, "output_dense": {"bias": 0.019775390625, "kernel": 0.1787109375}}, "final_layer_norm": {"bias": 0.017578125, "scale": 0.0244140625}, "layer_norm": {"bias": 0.02880859375, "scale": 0.0145263671875}}, "27": {"attention": {"k_proj": {"bias": 1.0848045349121094e-05, "kernel": 0.0185546875}, "out_proj": {"bias": 0.0196533203125, "kernel": 0.17578125}, "q_proj": {"bias": 0.0018463134765625, "kernel": 0.0211181640625}, "v_proj": {"bias": 0.02099609375, "kernel": 0.1767578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0120849609375, "kernel": 0.1123046875}, "output_dense": {"bias": 0.02099609375, "kernel": 0.189453125}}, "final_layer_norm": {"bias": 0.0208740234375, "scale": 0.028076171875}, "layer_norm": {"bias": 0.029541015625, "scale": 0.01495361328125}}, "28": {"attention": {"k_proj": {"bias": 6.794929504394531e-06, "kernel": 0.0172119140625}, "out_proj": {"bias": 0.020751953125, "kernel": 0.185546875}, "q_proj": {"bias": 0.001861572265625, "kernel": 0.02001953125}, "v_proj": {"bias": 0.0230712890625, "kernel": 0.19140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.01190185546875, "kernel": 0.11328125}, "output_dense": {"bias": 0.022216796875, "kernel": 0.205078125}}, "final_layer_norm": {"bias": 0.0224609375, "scale": 0.024169921875}, "layer_norm": {"bias": 0.0341796875, "scale": 0.024169921875}}, "29": {"attention": {"k_proj": {"bias": 5.7220458984375e-06, "kernel": 0.0189208984375}, "out_proj": {"bias": 0.0218505859375, "kernel": 0.1884765625}, "q_proj": {"bias": 0.00191497802734375, "kernel": 0.0208740234375}, "v_proj": {"bias": 0.025146484375, "kernel": 0.2109375}}, "feed_forward": {"intermediate_dense": {"bias": 0.01141357421875, "kernel": 0.111328125}, "output_dense": {"bias": 0.0228271484375, "kernel": 0.212890625}}, "final_layer_norm": {"bias": 0.01806640625, "scale": 0.024658203125}, "layer_norm": {"bias": 0.037841796875, "scale": 0.019287109375}}, "3": {"attention": {"k_proj": {"bias": 3.647804260253906e-05, "kernel": 0.03759765625}, "out_proj": {"bias": 0.01361083984375, "kernel": 0.15625}, "q_proj": {"bias": 0.002716064453125, "kernel": 0.039794921875}, "v_proj": {"bias": 0.019287109375, "kernel": 0.1748046875}}, "feed_forward": {"intermediate_dense": {"bias": 0.01446533203125, "kernel": 0.232421875}, "output_dense": {"bias": 0.012451171875, "kernel": 0.173828125}}, "final_layer_norm": {"bias": 0.027099609375, "scale": 0.0322265625}, "layer_norm": {"bias": 0.0286865234375, "scale": 0.020263671875}}, "30": {"attention": {"k_proj": {"bias": 8.463859558105469e-06, "kernel": 0.0213623046875}, "out_proj": {"bias": 0.0238037109375, "kernel": 0.212890625}, "q_proj": {"bias": 0.002105712890625, "kernel": 0.0234375}, "v_proj": {"bias": 0.027099609375, "kernel": 0.244140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.01226806640625, "kernel": 0.12255859375}, "output_dense": {"bias": 0.0250244140625, "kernel": 0.2294921875}}, "final_layer_norm": {"bias": 0.020751953125, "scale": 0.031005859375}, "layer_norm": {"bias": 0.037353515625, "scale": 0.020263671875}}, "31": {"attention": {"k_proj": {"bias": 1.4901161193847656e-05, "kernel": 0.04736328125}, "out_proj": {"bias": 0.0269775390625, "kernel": 0.29296875}, "q_proj": {"bias": 0.00433349609375, "kernel": 0.05224609375}, "v_proj": {"bias": 0.03271484375, "kernel": 0.314453125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0137939453125, "kernel": 0.140625}, "output_dense": {"bias": 0.0284423828125, "kernel": 0.2578125}}, "final_layer_norm": {"bias": 0.02294921875, "scale": 0.02880859375}, "layer_norm": {"bias": 0.048583984375, "scale": 0.025634765625}}, "32": {"attention": {"k_proj": {"bias": 1.2040138244628906e-05, "kernel": 0.0299072265625}, "out_proj": {"bias": 0.0283203125, "kernel": 0.30078125}, "q_proj": {"bias": 0.00274658203125, "kernel": 0.032470703125}, "v_proj": {"bias": 0.03271484375, "kernel": 0.314453125}}, "feed_forward": {"intermediate_dense": {"bias": 0.01611328125, "kernel": 0.171875}, "output_dense": {"bias": 0.030517578125, "kernel": 0.2734375}}, "final_layer_norm": {"bias": 0.0263671875, "scale": 0.030029296875}, "layer_norm": {"bias": 0.044921875, "scale": 0.0439453125}}, "33": {"attention": {"k_proj": {"bias": 1.0013580322265625e-05, "kernel": 0.03466796875}, "out_proj": {"bias": 0.031494140625, "kernel": 0.361328125}, "q_proj": {"bias": 0.002899169921875, "kernel": 0.0380859375}, "v_proj": {"bias": 0.037109375, "kernel": 0.384765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.021240234375, "kernel": 0.23046875}, "output_dense": {"bias": 0.03466796875, "kernel": 0.30859375}}, "final_layer_norm": {"bias": 0.035400390625, "scale": 0.058349609375}, "layer_norm": {"bias": 0.0498046875, "scale": 0.037109375}}, "34": {"attention": {"k_proj": {"bias": 1.1563301086425781e-05, "kernel": 0.043701171875}, "out_proj": {"bias": 0.0322265625, "kernel": 0.404296875}, "q_proj": {"bias": 0.00384521484375, "kernel": 0.048583984375}, "v_proj": {"bias": 0.039794921875, "kernel": 0.443359375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0263671875, "kernel": 0.322265625}, "output_dense": {"bias": 0.035888671875, "kernel": 0.33984375}}, "final_layer_norm": {"bias": 0.044189453125, "scale": 0.07373046875}, "layer_norm": {"bias": 0.05908203125, "scale": 0.04638671875}}, "35": {"attention": {"k_proj": {"bias": 4.38690185546875e-05, "kernel": 0.0615234375}, "out_proj": {"bias": 0.037353515625, "kernel": 0.640625}, "q_proj": {"bias": 0.00482177734375, "kernel": 0.0703125}, "v_proj": {"bias": 0.044921875, "kernel": 0.6015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.031982421875, "kernel": 0.423828125}, "output_dense": {"bias": 0.0400390625, "kernel": 0.38671875}}, "final_layer_norm": {"bias": 0.052001953125, "scale": 0.052001953125}, "layer_norm": {"bias": 0.0693359375, "scale": 0.041259765625}}, "36": {"attention": {"k_proj": {"bias": 2.6941299438476562e-05, "kernel": 0.06640625}, "out_proj": {"bias": 0.04052734375, "kernel": 0.7421875}, "q_proj": {"bias": 0.005157470703125, "kernel": 0.08154296875}, "v_proj": {"bias": 0.0478515625, "kernel": 0.640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.039306640625, "kernel": 0.5390625}, "output_dense": {"bias": 0.04345703125, "kernel": 0.443359375}}, "final_layer_norm": {"bias": 0.0634765625, "scale": 0.04931640625}, "layer_norm": {"bias": 0.0732421875, "scale": 0.05419921875}}, "37": {"attention": {"k_proj": {"bias": 2.6702880859375e-05, "kernel": 0.0859375}, "out_proj": {"bias": 0.04296875, "kernel": 0.8515625}, "q_proj": {"bias": 0.006744384765625, "kernel": 0.099609375}, "v_proj": {"bias": 0.06103515625, "kernel": 0.83984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.05224609375, "kernel": 0.75}, "output_dense": {"bias": 0.045166015625, "kernel": 0.50390625}}, "final_layer_norm": {"bias": 0.087890625, "scale": 0.0625}, "layer_norm": {"bias": 0.09912109375, "scale": 0.091796875}}, "38": {"attention": {"k_proj": {"bias": 2.8967857360839844e-05, "kernel": 0.10546875}, "out_proj": {"bias": 0.0419921875, "kernel": 0.90625}, "q_proj": {"bias": 0.0076904296875, "kernel": 0.119140625}, "v_proj": {"bias": 0.06005859375, "kernel": 0.859375}}, "feed_forward": {"intermediate_dense": {"bias": 0.05859375, "kernel": 0.9296875}, "output_dense": {"bias": 0.04248046875, "kernel": 0.59375}}, "final_layer_norm": {"bias": 0.10205078125, "scale": 0.08056640625}, "layer_norm": {"bias": 0.1015625, "scale": 0.064453125}}, "39": {"attention": {"k_proj": {"bias": 2.9325485229492188e-05, "kernel": 0.140625}, "out_proj": {"bias": 0.0380859375, "kernel": 0.953125}, "q_proj": {"bias": 0.007568359375, "kernel": 0.138671875}, "v_proj": {"bias": 0.05224609375, "kernel": 0.85546875}}, "feed_forward": {"intermediate_dense": {"bias": 0.05029296875, "kernel": 0.890625}, "output_dense": {"bias": 0.03759765625, "kernel": 0.828125}}, "final_layer_norm": {"bias": 0.0791015625, "scale": 0.0732421875}, "layer_norm": {"bias": 0.09375, "scale": 0.10546875}}, "4": {"attention": {"k_proj": {"bias": 6.103515625e-05, "kernel": 0.0517578125}, "out_proj": {"bias": 0.0128173828125, "kernel": 0.1689453125}, "q_proj": {"bias": 0.003570556640625, "kernel": 0.05419921875}, "v_proj": {"bias": 0.01806640625, "kernel": 0.1923828125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0140380859375, "kernel": 0.2177734375}, "output_dense": {"bias": 0.01214599609375, "kernel": 0.16796875}}, "final_layer_norm": {"bias": 0.024169921875, "scale": 0.023193359375}, "layer_norm": {"bias": 0.0269775390625, "scale": 0.0198974609375}}, "40": {"attention": {"k_proj": {"bias": 2.372264862060547e-05, "kernel": 0.10791015625}, "out_proj": {"bias": 0.035400390625, "kernel": 1.1015625}, "q_proj": {"bias": 0.005859375, "kernel": 0.1162109375}, "v_proj": {"bias": 0.048828125, "kernel": 0.9375}}, "feed_forward": {"intermediate_dense": {"bias": 0.050537109375, "kernel": 0.9921875}, "output_dense": {"bias": 0.0361328125, "kernel": 0.6953125}}, "final_layer_norm": {"bias": 0.09033203125, "scale": 0.1103515625}, "layer_norm": {"bias": 0.08203125, "scale": 0.0693359375}}, "41": {"attention": {"k_proj": {"bias": 2.384185791015625e-05, "kernel": 0.11328125}, "out_proj": {"bias": 0.0306396484375, "kernel": 0.9375}, "q_proj": {"bias": 0.0068359375, "kernel": 0.1572265625}, "v_proj": {"bias": 0.046875, "kernel": 0.9609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.044921875, "kernel": 0.99609375}, "output_dense": {"bias": 0.032470703125, "kernel": 0.81640625}}, "final_layer_norm": {"bias": 0.0830078125, "scale": 0.07421875}, "layer_norm": {"bias": 0.07470703125, "scale": 0.0927734375}}, "42": {"attention": {"k_proj": {"bias": 2.2411346435546875e-05, "kernel": 0.09521484375}, "out_proj": {"bias": 0.0308837890625, "kernel": 0.875}, "q_proj": {"bias": 0.0064697265625, "kernel": 0.14453125}, "v_proj": {"bias": 0.041015625, "kernel": 0.8515625}}, "feed_forward": {"intermediate_dense": {"bias": 0.042724609375, "kernel": 1.0078125}, "output_dense": {"bias": 0.032470703125, "kernel": 0.80078125}}, "final_layer_norm": {"bias": 0.076171875, "scale": 0.08544921875}, "layer_norm": {"bias": 0.06787109375, "scale": 0.09228515625}}, "43": {"attention": {"k_proj": {"bias": 2.1457672119140625e-05, "kernel": 0.060546875}, "out_proj": {"bias": 0.031494140625, "kernel": 0.765625}, "q_proj": {"bias": 0.004058837890625, "kernel": 0.08642578125}, "v_proj": {"bias": 0.04248046875, "kernel": 0.8046875}}, "feed_forward": {"intermediate_dense": {"bias": 0.04736328125, "kernel": 1.21875}, "output_dense": {"bias": 0.03515625, "kernel": 0.79296875}}, "final_layer_norm": {"bias": 0.0751953125, "scale": 0.08349609375}, "layer_norm": {"bias": 0.0830078125, "scale": 0.103515625}}, "44": {"attention": {"k_proj": {"bias": 1.0073184967041016e-05, "kernel": 0.083984375}, "out_proj": {"bias": 0.0341796875, "kernel": 0.7734375}, "q_proj": {"bias": 0.00628662109375, "kernel": 0.12890625}, "v_proj": {"bias": 0.0419921875, "kernel": 0.81640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.04248046875, "kernel": 1.203125}, "output_dense": {"bias": 0.03759765625, "kernel": 0.86328125}}, "final_layer_norm": {"bias": 0.068359375, "scale": 0.06005859375}, "layer_norm": {"bias": 0.078125, "scale": 0.08203125}}, "45": {"attention": {"k_proj": {"bias": 1.1682510375976562e-05, "kernel": 0.150390625}, "out_proj": {"bias": 0.035400390625, "kernel": 0.84375}, "q_proj": {"bias": 0.01275634765625, "kernel": 0.2578125}, "v_proj": {"bias": 0.04296875, "kernel": 0.8125}}, "feed_forward": {"intermediate_dense": {"bias": 0.038330078125, "kernel": 1.109375}, "output_dense": {"bias": 0.03662109375, "kernel": 0.9296875}}, "final_layer_norm": {"bias": 0.0576171875, "scale": 0.06787109375}, "layer_norm": {"bias": 0.09765625, "scale": 0.0849609375}}, "46": {"attention": {"k_proj": {"bias": 1.3947486877441406e-05, "kernel": 0.3203125}, "out_proj": {"bias": 0.032958984375, "kernel": 0.80078125}, "q_proj": {"bias": 0.01422119140625, "kernel": 0.283203125}, "v_proj": {"bias": 0.04736328125, "kernel": 0.8671875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0283203125, "kernel": 0.71484375}, "output_dense": {"bias": 0.0322265625, "kernel": 0.64453125}}, "final_layer_norm": {"bias": 0.044677734375, "scale": 0.05029296875}, "layer_norm": {"bias": 0.1279296875, "scale": 0.1484375}}, "47": {"attention": {"k_proj": {"bias": 8.225440979003906e-06, "kernel": 0.107421875}, "out_proj": {"bias": 0.03271484375, "kernel": 0.53515625}, "q_proj": {"bias": 0.007171630859375, "kernel": 0.1298828125}, "v_proj": {"bias": 0.052734375, "kernel": 0.90625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0247802734375, "kernel": 0.5}, "output_dense": {"bias": 0.03076171875, "kernel": 0.4921875}}, "final_layer_norm": {"bias": 0.041015625, "scale": 0.04052734375}, "layer_norm": {"bias": 0.1416015625, "scale": 0.125}}, "5": {"attention": {"k_proj": {"bias": 2.574920654296875e-05, "kernel": 0.035400390625}, "out_proj": {"bias": 0.0133056640625, "kernel": 0.12060546875}, "q_proj": {"bias": 0.0024871826171875, "kernel": 0.03955078125}, "v_proj": {"bias": 0.019287109375, "kernel": 0.1552734375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0137939453125, "kernel": 0.203125}, "output_dense": {"bias": 0.0128173828125, "kernel": 0.16015625}}, "final_layer_norm": {"bias": 0.023681640625, "scale": 0.021484375}, "layer_norm": {"bias": 0.0286865234375, "scale": 0.0244140625}}, "6": {"attention": {"k_proj": {"bias": 3.123283386230469e-05, "kernel": 0.0390625}, "out_proj": {"bias": 0.01300048828125, "kernel": 0.1455078125}, "q_proj": {"bias": 0.002593994140625, "kernel": 0.042236328125}, "v_proj": {"bias": 0.0186767578125, "kernel": 0.1708984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.01324462890625, "kernel": 0.1962890625}, "output_dense": {"bias": 0.01324462890625, "kernel": 0.154296875}}, "final_layer_norm": {"bias": 0.02294921875, "scale": 0.0220947265625}, "layer_norm": {"bias": 0.0272216796875, "scale": 0.01806640625}}, "7": {"attention": {"k_proj": {"bias": 8.869171142578125e-05, "kernel": 0.05859375}, "out_proj": {"bias": 0.01361083984375, "kernel": 0.162109375}, "q_proj": {"bias": 0.003631591796875, "kernel": 0.05712890625}, "v_proj": {"bias": 0.021484375, "kernel": 0.2119140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.01348876953125, "kernel": 0.203125}, "output_dense": {"bias": 0.01275634765625, "kernel": 0.154296875}}, "final_layer_norm": {"bias": 0.026123046875, "scale": 0.0201416015625}, "layer_norm": {"bias": 0.035888671875, "scale": 0.02978515625}}, "8": {"attention": {"k_proj": {"bias": 5.793571472167969e-05, "kernel": 0.06298828125}, "out_proj": {"bias": 0.013671875, "kernel": 0.154296875}, "q_proj": {"bias": 0.00408935546875, "kernel": 0.06201171875}, "v_proj": {"bias": 0.0198974609375, "kernel": 0.19140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.01556396484375, "kernel": 0.224609375}, "output_dense": {"bias": 0.01544189453125, "kernel": 0.1748046875}}, "final_layer_norm": {"bias": 0.029541015625, "scale": 0.02197265625}, "layer_norm": {"bias": 0.033203125, "scale": 0.0252685546875}}, "9": {"attention": {"k_proj": {"bias": 4.1484832763671875e-05, "kernel": 0.10205078125}, "out_proj": {"bias": 0.0135498046875, "kernel": 0.189453125}, "q_proj": {"bias": 0.00921630859375, "kernel": 0.1337890625}, "v_proj": {"bias": 0.0224609375, "kernel": 0.2255859375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0150146484375, "kernel": 0.216796875}, "output_dense": {"bias": 0.01177978515625, "kernel": 0.1533203125}}, "final_layer_norm": {"bias": 0.0283203125, "scale": 0.021484375}, "layer_norm": {"bias": 0.042724609375, "scale": 0.09716796875}}}, "pos_conv_embed": {"conv": {"bias": 0.02294921875, "weight_g": 0.017578125, "weight_v": 0.16796875}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "3": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "4": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "5": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "6": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 0.052734375, "scale": 0.0634765625}, "projection": {"bias": 0.03271484375, "kernel": 0.53125}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"lm_head": {"bias": 0.000513934064656496, "kernel": 4.458161354064941}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.8087399005889893, "scale": 22.21056365966797}, "layers": {"0": {"attention": {"k_proj": {"bias": 0.004918951541185379, "kernel": 25.907794952392578}, "out_proj": {"bias": 1.5435106754302979, "kernel": 25.071956634521484}, "q_proj": {"bias": 1.300584077835083, "kernel": 26.182353973388672}, "v_proj": {"bias": 0.34525907039642334, "kernel": 25.80518341064453}}, "feed_forward": {"intermediate_dense": {"bias": 1.7575452327728271, "kernel": 95.10066986083984}, "output_dense": {"bias": 1.0201668739318848, "kernel": 90.88314819335938}}, "final_layer_norm": {"bias": 1.2844293117523193, "scale": 19.878904342651367}, "layer_norm": {"bias": 3.290097951889038, "scale": 16.03858184814453}}, "1": {"attention": {"k_proj": {"bias": 0.0072013214230537415, "kernel": 40.25491714477539}, "out_proj": {"bias": 1.29144287109375, "kernel": 41.6448860168457}, "q_proj": {"bias": 2.8571319580078125, "kernel": 40.09453201293945}, "v_proj": {"bias": 0.28102225065231323, "kernel": 40.13139343261719}}, "feed_forward": {"intermediate_dense": {"bias": 1.5771745443344116, "kernel": 93.18878173828125}, "output_dense": {"bias": 0.803621768951416, "kernel": 84.10867309570312}}, "final_layer_norm": {"bias": 1.1331145763397217, "scale": 18.403972625732422}, "layer_norm": {"bias": 1.7383671998977661, "scale": 19.385173797607422}}, "10": {"attention": {"k_proj": {"bias": 0.028601065278053284, "kernel": 47.28178405761719}, "out_proj": {"bias": 1.2180893421173096, "kernel": 50.115089416503906}, "q_proj": {"bias": 2.4192051887512207, "kernel": 47.243019104003906}, "v_proj": {"bias": 0.315180242061615, "kernel": 50.312808990478516}}, "feed_forward": {"intermediate_dense": {"bias": 1.6207945346832275, "kernel": 97.54641723632812}, "output_dense": {"bias": 0.5638551712036133, "kernel": 91.48897552490234}}, "final_layer_norm": {"bias": 2.1993794441223145, "scale": 20.35513687133789}, "layer_norm": {"bias": 1.6890054941177368, "scale": 22.307621002197266}}, "11": {"attention": {"k_proj": {"bias": 0.09233956038951874, "kernel": 47.06513214111328}, "out_proj": {"bias": 1.0699727535247803, "kernel": 49.30004119873047}, "q_proj": {"bias": 2.4686877727508545, "kernel": 46.789527893066406}, "v_proj": {"bias": 0.3560459613800049, "kernel": 49.84272003173828}}, "feed_forward": {"intermediate_dense": {"bias": 1.6689014434814453, "kernel": 98.29637908935547}, "output_dense": {"bias": 0.5470572710037231, "kernel": 93.20014953613281}}, "final_layer_norm": {"bias": 2.178788661956787, "scale": 20.362865447998047}, "layer_norm": {"bias": 1.673203945159912, "scale": 22.604415893554688}}, "12": {"attention": {"k_proj": {"bias": 0.03298710286617279, "kernel": 47.65814971923828}, "out_proj": {"bias": 1.0556013584136963, "kernel": 49.61164474487305}, "q_proj": {"bias": 2.355423927307129, "kernel": 47.41543960571289}, "v_proj": {"bias": 0.34211331605911255, "kernel": 50.02819061279297}}, "feed_forward": {"intermediate_dense": {"bias": 1.712772250175476, "kernel": 99.13731384277344}, "output_dense": {"bias": 0.5366638898849487, "kernel": 94.74702453613281}}, "final_layer_norm": {"bias": 2.137502908706665, "scale": 20.325902938842773}, "layer_norm": {"bias": 1.7379934787750244, "scale": 23.156574249267578}}, "13": {"attention": {"k_proj": {"bias": 0.06237822026014328, "kernel": 49.54319763183594}, "out_proj": {"bias": 1.0485777854919434, "kernel": 49.24932098388672}, "q_proj": {"bias": 2.3326220512390137, "kernel": 49.39902114868164}, "v_proj": {"bias": 0.36998122930526733, "kernel": 49.42940139770508}}, "feed_forward": {"intermediate_dense": {"bias": 1.7665362358093262, "kernel": 99.74223327636719}, "output_dense": {"bias": 0.5536751747131348, "kernel": 95.18029022216797}}, "final_layer_norm": {"bias": 2.0175793170928955, "scale": 20.470375061035156}, "layer_norm": {"bias": 1.8363455533981323, "scale": 23.375064849853516}}, "14": {"attention": {"k_proj": {"bias": 0.14924587309360504, "kernel": 49.75286865234375}, "out_proj": {"bias": 1.2119272947311401, "kernel": 47.69355392456055}, "q_proj": {"bias": 2.3951995372772217, "kernel": 49.8133544921875}, "v_proj": {"bias": 0.3722843825817108, "kernel": 47.274627685546875}}, "feed_forward": {"intermediate_dense": {"bias": 1.8020305633544922, "kernel": 100.36180114746094}, "output_dense": {"bias": 0.5680183172225952, "kernel": 96.54590606689453}}, "final_layer_norm": {"bias": 2.1541244983673096, "scale": 20.61456298828125}, "layer_norm": {"bias": 1.9671568870544434, "scale": 23.55438804626465}}, "15": {"attention": {"k_proj": {"bias": 0.07449370622634888, "kernel": 49.87868881225586}, "out_proj": {"bias": 1.2609732151031494, "kernel": 48.29032897949219}, "q_proj": {"bias": 2.545073986053467, "kernel": 49.94525909423828}, "v_proj": {"bias": 0.40386444330215454, "kernel": 47.93647766113281}}, "feed_forward": {"intermediate_dense": {"bias": 1.8114612102508545, "kernel": 100.1701431274414}, "output_dense": {"bias": 0.7159097194671631, "kernel": 97.22415161132812}}, "final_layer_norm": {"bias": 2.0759165287017822, "scale": 20.706335067749023}, "layer_norm": {"bias": 2.215679168701172, "scale": 23.692293167114258}}, "16": {"attention": {"k_proj": {"bias": 0.028690317645668983, "kernel": 49.789791107177734}, "out_proj": {"bias": 1.1964430809020996, "kernel": 47.762840270996094}, "q_proj": {"bias": 2.625481128692627, "kernel": 49.67322540283203}, "v_proj": {"bias": 0.3599017262458801, "kernel": 47.443382263183594}}, "feed_forward": {"intermediate_dense": {"bias": 1.8105230331420898, "kernel": 100.82234191894531}, "output_dense": {"bias": 0.7385136485099792, "kernel": 98.08937072753906}}, "final_layer_norm": {"bias": 2.156309127807617, "scale": 21.189599990844727}, "layer_norm": {"bias": 2.149183750152588, "scale": 22.610759735107422}}, "17": {"attention": {"k_proj": {"bias": 0.015740085393190384, "kernel": 50.001319885253906}, "out_proj": {"bias": 1.139474630355835, "kernel": 47.080406188964844}, "q_proj": {"bias": 2.6962077617645264, "kernel": 50.104488372802734}, "v_proj": {"bias": 0.3982570171356201, "kernel": 46.750732421875}}, "feed_forward": {"intermediate_dense": {"bias": 1.8210697174072266, "kernel": 101.90509796142578}, "output_dense": {"bias": 0.7553049921989441, "kernel": 98.48403930664062}}, "final_layer_norm": {"bias": 2.240560531616211, "scale": 21.749385833740234}, "layer_norm": {"bias": 2.0655288696289062, "scale": 22.176105499267578}}, "18": {"attention": {"k_proj": {"bias": 0.0625796914100647, "kernel": 50.267364501953125}, "out_proj": {"bias": 1.2416081428527832, "kernel": 48.100120544433594}, "q_proj": {"bias": 2.5947299003601074, "kernel": 50.652549743652344}, "v_proj": {"bias": 0.4260401725769043, "kernel": 47.62586212158203}}, "feed_forward": {"intermediate_dense": {"bias": 1.8635938167572021, "kernel": 102.20716857910156}, "output_dense": {"bias": 0.8702353835105896, "kernel": 100.12542724609375}}, "final_layer_norm": {"bias": 2.342595100402832, "scale": 21.712392807006836}, "layer_norm": {"bias": 2.2413687705993652, "scale": 23.877422332763672}}, "19": {"attention": {"k_proj": {"bias": 0.008574407547712326, "kernel": 49.54846954345703}, "out_proj": {"bias": 1.2168827056884766, "kernel": 47.98948287963867}, "q_proj": {"bias": 2.867854595184326, "kernel": 49.98030090332031}, "v_proj": {"bias": 0.38979336619377136, "kernel": 47.23333740234375}}, "feed_forward": {"intermediate_dense": {"bias": 1.9191184043884277, "kernel": 102.78443908691406}, "output_dense": {"bias": 0.9342584609985352, "kernel": 101.0276107788086}}, "final_layer_norm": {"bias": 2.3034279346466064, "scale": 22.071582794189453}, "layer_norm": {"bias": 2.1650390625, "scale": 23.092153549194336}}, "2": {"attention": {"k_proj": {"bias": 0.03653004392981529, "kernel": 46.15614318847656}, "out_proj": {"bias": 1.2130026817321777, "kernel": 43.853614807128906}, "q_proj": {"bias": 3.041682243347168, "kernel": 45.9248046875}, "v_proj": {"bias": 0.30890217423439026, "kernel": 43.85284423828125}}, "feed_forward": {"intermediate_dense": {"bias": 1.6167690753936768, "kernel": 98.25933074951172}, "output_dense": {"bias": 0.6920140981674194, "kernel": 87.242431640625}}, "final_layer_norm": {"bias": 1.4544155597686768, "scale": 20.98486328125}, "layer_norm": {"bias": 1.667765736579895, "scale": 22.059188842773438}}, "20": {"attention": {"k_proj": {"bias": 0.0037271142937242985, "kernel": 49.5183219909668}, "out_proj": {"bias": 1.2461135387420654, "kernel": 47.375213623046875}, "q_proj": {"bias": 2.7794594764709473, "kernel": 50.30613327026367}, "v_proj": {"bias": 0.3623378872871399, "kernel": 46.273372650146484}}, "feed_forward": {"intermediate_dense": {"bias": 1.9209908246994019, "kernel": 104.0489501953125}, "output_dense": {"bias": 1.0497362613677979, "kernel": 101.65393829345703}}, "final_layer_norm": {"bias": 2.333451509475708, "scale": 23.015880584716797}, "layer_norm": {"bias": 2.1426877975463867, "scale": 23.235729217529297}}, "21": {"attention": {"k_proj": {"bias": 0.03329595923423767, "kernel": 49.96206283569336}, "out_proj": {"bias": 1.2808051109313965, "kernel": 47.415626525878906}, "q_proj": {"bias": 2.7249975204467773, "kernel": 50.80659103393555}, "v_proj": {"bias": 0.4175662398338318, "kernel": 46.52143096923828}}, "feed_forward": {"intermediate_dense": {"bias": 1.9632256031036377, "kernel": 104.24440002441406}, "output_dense": {"bias": 1.1203250885009766, "kernel": 101.97819519042969}}, "final_layer_norm": {"bias": 2.3604397773742676, "scale": 22.662506103515625}, "layer_norm": {"bias": 2.2138638496398926, "scale": 23.515274047851562}}, "22": {"attention": {"k_proj": {"bias": 0.012683916836977005, "kernel": 50.35405349731445}, "out_proj": {"bias": 1.1991708278656006, "kernel": 46.8690185546875}, "q_proj": {"bias": 2.81036376953125, "kernel": 50.73627853393555}, "v_proj": {"bias": 0.37005093693733215, "kernel": 46.736412048339844}}, "feed_forward": {"intermediate_dense": {"bias": 1.8951349258422852, "kernel": 104.63954162597656}, "output_dense": {"bias": 1.1302428245544434, "kernel": 101.25523376464844}}, "final_layer_norm": {"bias": 2.2466814517974854, "scale": 22.182716369628906}, "layer_norm": {"bias": 2.2098731994628906, "scale": 22.525482177734375}}, "23": {"attention": {"k_proj": {"bias": 0.120549276471138, "kernel": 51.46174621582031}, "out_proj": {"bias": 1.3264572620391846, "kernel": 47.87089920043945}, "q_proj": {"bias": 2.6404333114624023, "kernel": 51.57463836669922}, "v_proj": {"bias": 0.5210777521133423, "kernel": 48.51097869873047}}, "feed_forward": {"intermediate_dense": {"bias": 1.8714258670806885, "kernel": 104.43405151367188}, "output_dense": {"bias": 1.108646273612976, "kernel": 102.05201721191406}}, "final_layer_norm": {"bias": 2.491565465927124, "scale": 22.138986587524414}, "layer_norm": {"bias": 2.697974920272827, "scale": 23.734621047973633}}, "24": {"attention": {"k_proj": {"bias": 0.05398482829332352, "kernel": 49.93202590942383}, "out_proj": {"bias": 1.3825275897979736, "kernel": 49.85151290893555}, "q_proj": {"bias": 2.803582191467285, "kernel": 49.924556732177734}, "v_proj": {"bias": 0.47551417350769043, "kernel": 49.927547454833984}}, "feed_forward": {"intermediate_dense": {"bias": 1.990923285484314, "kernel": 103.90543365478516}, "output_dense": {"bias": 1.1458380222320557, "kernel": 104.9254150390625}}, "final_layer_norm": {"bias": 2.5995006561279297, "scale": 22.194211959838867}, "layer_norm": {"bias": 2.419203042984009, "scale": 23.26953887939453}}, "25": {"attention": {"k_proj": {"bias": 0.04450356587767601, "kernel": 50.49494934082031}, "out_proj": {"bias": 1.1957685947418213, "kernel": 47.765289306640625}, "q_proj": {"bias": 2.878711223602295, "kernel": 50.281700134277344}, "v_proj": {"bias": 0.5583701133728027, "kernel": 48.30183410644531}}, "feed_forward": {"intermediate_dense": {"bias": 1.8914387226104736, "kernel": 104.17427062988281}, "output_dense": {"bias": 1.0262477397918701, "kernel": 104.85499572753906}}, "final_layer_norm": {"bias": 2.305722713470459, "scale": 22.72964096069336}, "layer_norm": {"bias": 2.583354949951172, "scale": 22.42755126953125}}, "26": {"attention": {"k_proj": {"bias": 0.07086975127458572, "kernel": 50.69786071777344}, "out_proj": {"bias": 1.1329269409179688, "kernel": 48.53632354736328}, "q_proj": {"bias": 2.8333656787872314, "kernel": 50.4635124206543}, "v_proj": {"bias": 0.49598926305770874, "kernel": 49.14326477050781}}, "feed_forward": {"intermediate_dense": {"bias": 1.983544111251831, "kernel": 103.60462951660156}, "output_dense": {"bias": 0.9868446588516235, "kernel": 102.02833557128906}}, "final_layer_norm": {"bias": 1.9362690448760986, "scale": 21.587879180908203}, "layer_norm": {"bias": 2.4858784675598145, "scale": 22.868398666381836}}, "27": {"attention": {"k_proj": {"bias": 0.3732529580593109, "kernel": 51.34492492675781}, "out_proj": {"bias": 1.3583134412765503, "kernel": 49.85731506347656}, "q_proj": {"bias": 2.618594169616699, "kernel": 51.20601272583008}, "v_proj": {"bias": 0.5683501958847046, "kernel": 50.311988830566406}}, "feed_forward": {"intermediate_dense": {"bias": 2.143080472946167, "kernel": 101.88013458251953}, "output_dense": {"bias": 0.8687618970870972, "kernel": 101.70985412597656}}, "final_layer_norm": {"bias": 2.2165541648864746, "scale": 20.85378646850586}, "layer_norm": {"bias": 2.5526351928710938, "scale": 23.540470123291016}}, "28": {"attention": {"k_proj": {"bias": 0.4096335768699646, "kernel": 52.2803955078125}, "out_proj": {"bias": 1.386794090270996, "kernel": 50.622737884521484}, "q_proj": {"bias": 2.7664031982421875, "kernel": 51.92483901977539}, "v_proj": {"bias": 0.4615659713745117, "kernel": 50.95015335083008}}, "feed_forward": {"intermediate_dense": {"bias": 2.089618682861328, "kernel": 101.88746643066406}, "output_dense": {"bias": 0.7711120843887329, "kernel": 103.90321350097656}}, "final_layer_norm": {"bias": 2.126192092895508, "scale": 21.172107696533203}, "layer_norm": {"bias": 2.054711103439331, "scale": 24.408409118652344}}, "29": {"attention": {"k_proj": {"bias": 0.06762012839317322, "kernel": 48.740055084228516}, "out_proj": {"bias": 1.365987777709961, "kernel": 53.140220642089844}, "q_proj": {"bias": 2.7382171154022217, "kernel": 48.56138610839844}, "v_proj": {"bias": 0.41890132427215576, "kernel": 53.04469299316406}}, "feed_forward": {"intermediate_dense": {"bias": 2.090895175933838, "kernel": 102.560302734375}, "output_dense": {"bias": 0.872062623500824, "kernel": 108.16770935058594}}, "final_layer_norm": {"bias": 2.3700876235961914, "scale": 22.302989959716797}, "layer_norm": {"bias": 2.1496353149414062, "scale": 25.385906219482422}}, "3": {"attention": {"k_proj": {"bias": 0.1204313337802887, "kernel": 50.125701904296875}, "out_proj": {"bias": 1.3624417781829834, "kernel": 46.49477005004883}, "q_proj": {"bias": 2.7182188034057617, "kernel": 50.35295104980469}, "v_proj": {"bias": 0.3000553846359253, "kernel": 46.894187927246094}}, "feed_forward": {"intermediate_dense": {"bias": 1.6320147514343262, "kernel": 99.90219116210938}, "output_dense": {"bias": 0.6522164344787598, "kernel": 90.09829711914062}}, "final_layer_norm": {"bias": 1.7125478982925415, "scale": 21.080535888671875}, "layer_norm": {"bias": 1.8284051418304443, "scale": 23.59416961669922}}, "30": {"attention": {"k_proj": {"bias": 0.25504398345947266, "kernel": 50.66249084472656}, "out_proj": {"bias": 1.159855604171753, "kernel": 49.41553497314453}, "q_proj": {"bias": 2.799100637435913, "kernel": 50.74445343017578}, "v_proj": {"bias": 0.48367470502853394, "kernel": 49.75730895996094}}, "feed_forward": {"intermediate_dense": {"bias": 2.026671886444092, "kernel": 103.06732177734375}, "output_dense": {"bias": 0.8243669271469116, "kernel": 107.15858459472656}}, "final_layer_norm": {"bias": 2.1945126056671143, "scale": 23.442333221435547}, "layer_norm": {"bias": 2.301931381225586, "scale": 25.115337371826172}}, "31": {"attention": {"k_proj": {"bias": 0.35389411449432373, "kernel": 49.18547058105469}, "out_proj": {"bias": 1.0851024389266968, "kernel": 50.28276824951172}, "q_proj": {"bias": 2.581451892852783, "kernel": 49.28723907470703}, "v_proj": {"bias": 0.5289448499679565, "kernel": 50.41181182861328}}, "feed_forward": {"intermediate_dense": {"bias": 2.104063034057617, "kernel": 101.75663757324219}, "output_dense": {"bias": 1.002120852470398, "kernel": 104.55183410644531}}, "final_layer_norm": {"bias": 2.082719087600708, "scale": 23.341012954711914}, "layer_norm": {"bias": 2.2975897789001465, "scale": 24.888717651367188}}, "32": {"attention": {"k_proj": {"bias": 0.207120880484581, "kernel": 48.02813720703125}, "out_proj": {"bias": 1.093928337097168, "kernel": 49.468353271484375}, "q_proj": {"bias": 2.8447179794311523, "kernel": 48.01123809814453}, "v_proj": {"bias": 0.39654308557510376, "kernel": 49.75498962402344}}, "feed_forward": {"intermediate_dense": {"bias": 2.033989906311035, "kernel": 100.62407684326172}, "output_dense": {"bias": 1.0630019903182983, "kernel": 103.89726257324219}}, "final_layer_norm": {"bias": 2.044450283050537, "scale": 23.776098251342773}, "layer_norm": {"bias": 2.2476887702941895, "scale": 25.156360626220703}}, "33": {"attention": {"k_proj": {"bias": 0.2086963802576065, "kernel": 47.967620849609375}, "out_proj": {"bias": 1.1307460069656372, "kernel": 49.31550598144531}, "q_proj": {"bias": 2.9888792037963867, "kernel": 47.95985412597656}, "v_proj": {"bias": 0.42853063344955444, "kernel": 49.58100509643555}}, "feed_forward": {"intermediate_dense": {"bias": 2.041210174560547, "kernel": 99.00303649902344}, "output_dense": {"bias": 1.0359094142913818, "kernel": 102.67219543457031}}, "final_layer_norm": {"bias": 1.9568297863006592, "scale": 23.543582916259766}, "layer_norm": {"bias": 2.442399501800537, "scale": 25.396568298339844}}, "34": {"attention": {"k_proj": {"bias": 0.2259853184223175, "kernel": 47.180938720703125}, "out_proj": {"bias": 1.3792003393173218, "kernel": 50.800682067871094}, "q_proj": {"bias": 2.8644118309020996, "kernel": 47.234779357910156}, "v_proj": {"bias": 0.39719992876052856, "kernel": 50.735965728759766}}, "feed_forward": {"intermediate_dense": {"bias": 2.121295690536499, "kernel": 97.8389892578125}, "output_dense": {"bias": 0.9670619368553162, "kernel": 101.99024963378906}}, "final_layer_norm": {"bias": 1.9000396728515625, "scale": 23.19698715209961}, "layer_norm": {"bias": 2.5240490436553955, "scale": 25.779953002929688}}, "35": {"attention": {"k_proj": {"bias": 0.35745763778686523, "kernel": 48.91041564941406}, "out_proj": {"bias": 1.2976853847503662, "kernel": 49.656803131103516}, "q_proj": {"bias": 2.615135908126831, "kernel": 49.244117736816406}, "v_proj": {"bias": 0.4806705713272095, "kernel": 49.48078155517578}}, "feed_forward": {"intermediate_dense": {"bias": 2.2015764713287354, "kernel": 96.44864654541016}, "output_dense": {"bias": 0.8609927296638489, "kernel": 100.73100280761719}}, "final_layer_norm": {"bias": 1.9790458679199219, "scale": 23.321218490600586}, "layer_norm": {"bias": 2.285153388977051, "scale": 26.278472900390625}}, "36": {"attention": {"k_proj": {"bias": 0.19027814269065857, "kernel": 46.2192497253418}, "out_proj": {"bias": 1.3382506370544434, "kernel": 50.997901916503906}, "q_proj": {"bias": 2.6993062496185303, "kernel": 46.20930480957031}, "v_proj": {"bias": 0.36446213722229004, "kernel": 51.18187713623047}}, "feed_forward": {"intermediate_dense": {"bias": 2.0755391120910645, "kernel": 95.54883575439453}, "output_dense": {"bias": 0.8958422541618347, "kernel": 100.42840576171875}}, "final_layer_norm": {"bias": 1.618175745010376, "scale": 23.848108291625977}, "layer_norm": {"bias": 2.0090999603271484, "scale": 25.78015899658203}}, "37": {"attention": {"k_proj": {"bias": 0.5269804000854492, "kernel": 45.260040283203125}, "out_proj": {"bias": 1.599480152130127, "kernel": 50.98283386230469}, "q_proj": {"bias": 2.3939435482025146, "kernel": 45.33077621459961}, "v_proj": {"bias": 0.36002129316329956, "kernel": 50.852684020996094}}, "feed_forward": {"intermediate_dense": {"bias": 1.971336841583252, "kernel": 94.80709075927734}, "output_dense": {"bias": 0.9046251773834229, "kernel": 100.2008285522461}}, "final_layer_norm": {"bias": 1.4468027353286743, "scale": 24.250158309936523}, "layer_norm": {"bias": 1.978513479232788, "scale": 25.821224212646484}}, "38": {"attention": {"k_proj": {"bias": 0.6130686402320862, "kernel": 43.44226837158203}, "out_proj": {"bias": 1.2996063232421875, "kernel": 50.466278076171875}, "q_proj": {"bias": 2.3290963172912598, "kernel": 43.45414733886719}, "v_proj": {"bias": 0.41826310753822327, "kernel": 50.33799743652344}}, "feed_forward": {"intermediate_dense": {"bias": 1.9165147542953491, "kernel": 92.85403442382812}, "output_dense": {"bias": 0.8927580118179321, "kernel": 98.45118713378906}}, "final_layer_norm": {"bias": 1.493051290512085, "scale": 24.96658706665039}, "layer_norm": {"bias": 2.1560826301574707, "scale": 26.53356170654297}}, "39": {"attention": {"k_proj": {"bias": 0.6430894136428833, "kernel": 43.21688461303711}, "out_proj": {"bias": 1.5947363376617432, "kernel": 50.339542388916016}, "q_proj": {"bias": 2.110431671142578, "kernel": 43.605262756347656}, "v_proj": {"bias": 0.38870078325271606, "kernel": 50.012779235839844}}, "feed_forward": {"intermediate_dense": {"bias": 1.9106833934783936, "kernel": 91.17467498779297}, "output_dense": {"bias": 0.972097635269165, "kernel": 98.83366394042969}}, "final_layer_norm": {"bias": 1.6390502452850342, "scale": 25.59851837158203}, "layer_norm": {"bias": 2.1347782611846924, "scale": 27.176971435546875}}, "4": {"attention": {"k_proj": {"bias": 0.13430944085121155, "kernel": 52.684295654296875}, "out_proj": {"bias": 1.544208288192749, "kernel": 47.894325256347656}, "q_proj": {"bias": 2.5189812183380127, "kernel": 52.865440368652344}, "v_proj": {"bias": 0.34665393829345703, "kernel": 48.25187683105469}}, "feed_forward": {"intermediate_dense": {"bias": 1.621163010597229, "kernel": 99.48369598388672}, "output_dense": {"bias": 0.8153223991394043, "kernel": 91.32151794433594}}, "final_layer_norm": {"bias": 1.7989122867584229, "scale": 20.61126708984375}, "layer_norm": {"bias": 1.922861099243164, "scale": 23.966323852539062}}, "40": {"attention": {"k_proj": {"bias": 0.5842467546463013, "kernel": 42.578712463378906}, "out_proj": {"bias": 1.5376503467559814, "kernel": 48.99298095703125}, "q_proj": {"bias": 2.046619176864624, "kernel": 43.344635009765625}, "v_proj": {"bias": 0.44114208221435547, "kernel": 48.57102584838867}}, "feed_forward": {"intermediate_dense": {"bias": 1.770835518836975, "kernel": 89.44320678710938}, "output_dense": {"bias": 1.0244522094726562, "kernel": 96.0932846069336}}, "final_layer_norm": {"bias": 1.798896312713623, "scale": 24.869396209716797}, "layer_norm": {"bias": 2.0778615474700928, "scale": 26.723310470581055}}, "41": {"attention": {"k_proj": {"bias": 1.6699845790863037, "kernel": 39.915489196777344}, "out_proj": {"bias": 1.2989803552627563, "kernel": 50.55731964111328}, "q_proj": {"bias": 1.7253488302230835, "kernel": 40.6795654296875}, "v_proj": {"bias": 0.3975880742073059, "kernel": 49.50791931152344}}, "feed_forward": {"intermediate_dense": {"bias": 1.9115654230117798, "kernel": 86.23811340332031}, "output_dense": {"bias": 1.0470619201660156, "kernel": 95.14247131347656}}, "final_layer_norm": {"bias": 2.297964096069336, "scale": 28.32220458984375}, "layer_norm": {"bias": 2.1078972816467285, "scale": 28.513172149658203}}, "42": {"attention": {"k_proj": {"bias": 0.7960059642791748, "kernel": 36.708290100097656}, "out_proj": {"bias": 1.3383876085281372, "kernel": 44.78962707519531}, "q_proj": {"bias": 1.5444276332855225, "kernel": 38.05863952636719}, "v_proj": {"bias": 0.5880073308944702, "kernel": 43.13645935058594}}, "feed_forward": {"intermediate_dense": {"bias": 1.6485475301742554, "kernel": 85.23059844970703}, "output_dense": {"bias": 1.0999541282653809, "kernel": 93.34835052490234}}, "final_layer_norm": {"bias": 2.021839141845703, "scale": 29.62232780456543}, "layer_norm": {"bias": 1.5734193325042725, "scale": 27.38504409790039}}, "43": {"attention": {"k_proj": {"bias": 1.2087428569793701, "kernel": 33.226219177246094}, "out_proj": {"bias": 1.3321952819824219, "kernel": 41.184059143066406}, "q_proj": {"bias": 1.3534941673278809, "kernel": 34.040531158447266}, "v_proj": {"bias": 0.5191360712051392, "kernel": 39.07907485961914}}, "feed_forward": {"intermediate_dense": {"bias": 1.6821610927581787, "kernel": 84.4572525024414}, "output_dense": {"bias": 0.8659600019454956, "kernel": 91.27842712402344}}, "final_layer_norm": {"bias": 1.9458153247833252, "scale": 31.839672088623047}, "layer_norm": {"bias": 1.6934361457824707, "scale": 25.536218643188477}}, "44": {"attention": {"k_proj": {"bias": 2.490312099456787, "kernel": 33.81727600097656}, "out_proj": {"bias": 1.0943963527679443, "kernel": 44.90924072265625}, "q_proj": {"bias": 1.2867296934127808, "kernel": 34.189945220947266}, "v_proj": {"bias": 0.3796514570713043, "kernel": 44.002098083496094}}, "feed_forward": {"intermediate_dense": {"bias": 1.761549472808838, "kernel": 83.41246795654297}, "output_dense": {"bias": 0.8125085830688477, "kernel": 88.93128967285156}}, "final_layer_norm": {"bias": 1.9331786632537842, "scale": 34.012088775634766}, "layer_norm": {"bias": 1.586810827255249, "scale": 25.554336547851562}}, "45": {"attention": {"k_proj": {"bias": 2.048306465148926, "kernel": 33.657684326171875}, "out_proj": {"bias": 0.9800734519958496, "kernel": 48.50779724121094}, "q_proj": {"bias": 1.3633925914764404, "kernel": 33.844757080078125}, "v_proj": {"bias": 0.43038854002952576, "kernel": 48.665504455566406}}, "feed_forward": {"intermediate_dense": {"bias": 1.8786242008209229, "kernel": 80.08413696289062}, "output_dense": {"bias": 0.9476521015167236, "kernel": 84.32176208496094}}, "final_layer_norm": {"bias": 1.676342487335205, "scale": 32.72097396850586}, "layer_norm": {"bias": 1.5172195434570312, "scale": 24.071025848388672}}, "46": {"attention": {"k_proj": {"bias": 1.5383708477020264, "kernel": 34.8392333984375}, "out_proj": {"bias": 0.7450060844421387, "kernel": 50.938255310058594}, "q_proj": {"bias": 1.532208800315857, "kernel": 34.96090316772461}, "v_proj": {"bias": 0.371351957321167, "kernel": 51.6893196105957}}, "feed_forward": {"intermediate_dense": {"bias": 1.9411137104034424, "kernel": 74.41993713378906}, "output_dense": {"bias": 1.1016592979431152, "kernel": 74.62105560302734}}, "final_layer_norm": {"bias": 1.6790804862976074, "scale": 28.232065200805664}, "layer_norm": {"bias": 1.3348368406295776, "scale": 22.986331939697266}}, "47": {"attention": {"k_proj": {"bias": 0.25895360112190247, "kernel": 37.09356689453125}, "out_proj": {"bias": 0.6301657557487488, "kernel": 45.20797348022461}, "q_proj": {"bias": 1.650336742401123, "kernel": 37.740478515625}, "v_proj": {"bias": 0.34607622027397156, "kernel": 46.190757751464844}}, "feed_forward": {"intermediate_dense": {"bias": 1.9951748847961426, "kernel": 71.75222778320312}, "output_dense": {"bias": 0.6057071685791016, "kernel": 68.11976623535156}}, "final_layer_norm": {"bias": 1.520125150680542, "scale": 23.069990158081055}, "layer_norm": {"bias": 1.0598437786102295, "scale": 20.234466552734375}}, "5": {"attention": {"k_proj": {"bias": 0.00840836763381958, "kernel": 48.02847671508789}, "out_proj": {"bias": 1.5279333591461182, "kernel": 49.12567138671875}, "q_proj": {"bias": 2.6153364181518555, "kernel": 48.17101287841797}, "v_proj": {"bias": 0.3096908926963806, "kernel": 49.92131805419922}}, "feed_forward": {"intermediate_dense": {"bias": 1.5453948974609375, "kernel": 99.59476470947266}, "output_dense": {"bias": 0.8454799652099609, "kernel": 90.61784362792969}}, "final_layer_norm": {"bias": 2.0749590396881104, "scale": 20.825382232666016}, "layer_norm": {"bias": 1.95145845413208, "scale": 23.389982223510742}}, "6": {"attention": {"k_proj": {"bias": 0.20033928751945496, "kernel": 49.63896179199219}, "out_proj": {"bias": 1.5183320045471191, "kernel": 48.44209289550781}, "q_proj": {"bias": 2.6636147499084473, "kernel": 50.118431091308594}, "v_proj": {"bias": 0.31253746151924133, "kernel": 48.97197723388672}}, "feed_forward": {"intermediate_dense": {"bias": 1.5235824584960938, "kernel": 98.68842315673828}, "output_dense": {"bias": 0.6965881586074829, "kernel": 90.20870971679688}}, "final_layer_norm": {"bias": 2.375554323196411, "scale": 20.302406311035156}, "layer_norm": {"bias": 1.9553532600402832, "scale": 23.748838424682617}}, "7": {"attention": {"k_proj": {"bias": 0.19373320043087006, "kernel": 49.441287994384766}, "out_proj": {"bias": 1.3339287042617798, "kernel": 48.69111633300781}, "q_proj": {"bias": 2.439426898956299, "kernel": 49.83055877685547}, "v_proj": {"bias": 0.39850425720214844, "kernel": 48.65386199951172}}, "feed_forward": {"intermediate_dense": {"bias": 1.529695987701416, "kernel": 98.43458557128906}, "output_dense": {"bias": 0.5387250185012817, "kernel": 89.9488525390625}}, "final_layer_norm": {"bias": 2.214036464691162, "scale": 20.541275024414062}, "layer_norm": {"bias": 1.8610401153564453, "scale": 22.47254753112793}}, "8": {"attention": {"k_proj": {"bias": 0.1718023270368576, "kernel": 48.945213317871094}, "out_proj": {"bias": 1.1568043231964111, "kernel": 49.24149703979492}, "q_proj": {"bias": 2.4160404205322266, "kernel": 48.7120361328125}, "v_proj": {"bias": 0.3257639408111572, "kernel": 49.424400329589844}}, "feed_forward": {"intermediate_dense": {"bias": 1.5825791358947754, "kernel": 98.04054260253906}, "output_dense": {"bias": 0.4940677881240845, "kernel": 89.3731689453125}}, "final_layer_norm": {"bias": 2.1672048568725586, "scale": 20.330373764038086}, "layer_norm": {"bias": 1.7923117876052856, "scale": 22.934362411499023}}, "9": {"attention": {"k_proj": {"bias": 0.20644523203372955, "kernel": 49.55120849609375}, "out_proj": {"bias": 1.357471227645874, "kernel": 50.029640197753906}, "q_proj": {"bias": 2.3735475540161133, "kernel": 49.710205078125}, "v_proj": {"bias": 0.3331097960472107, "kernel": 50.43877410888672}}, "feed_forward": {"intermediate_dense": {"bias": 1.662867546081543, "kernel": 96.64881896972656}, "output_dense": {"bias": 0.6358088254928589, "kernel": 89.91731262207031}}, "final_layer_norm": {"bias": 2.056180477142334, "scale": 19.610380172729492}, "layer_norm": {"bias": 1.8849740028381348, "scale": 24.290634155273438}}}, "pos_conv_embed": {"conv": {"bias": 5.547986030578613, "weight_g": 8.80840015411377, "weight_v": 84.6180648803711}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 2.0290679931640625, "kernel": 20.55536460876465}, "layer_norm": {"bias": 4.550922393798828, "scale": 16.167570114135742}}, "1": {"conv": {"bias": 1.7790228128433228, "kernel": 51.24136734008789}, "layer_norm": {"bias": 5.962646961212158, "scale": 23.268157958984375}}, "2": {"conv": {"bias": 1.140576720237732, "kernel": 46.50312042236328}, "layer_norm": {"bias": 4.176670551300049, "scale": 20.370853424072266}}, "3": {"conv": {"bias": 0.6725863218307495, "kernel": 44.397525787353516}, "layer_norm": {"bias": 3.888174533843994, "scale": 17.53795051574707}}, "4": {"conv": {"bias": 0.6373162269592285, "kernel": 41.314056396484375}, "layer_norm": {"bias": 2.385471820831299, "scale": 16.34571647644043}}, "5": {"conv": {"bias": 0.5147221684455872, "kernel": 37.479759216308594}, "layer_norm": {"bias": 2.020900011062622, "scale": 17.064470291137695}}, "6": {"conv": {"bias": 0.4947893023490906, "kernel": 40.64780044555664}, "layer_norm": {"bias": 0.5876954793930054, "scale": 19.058603286743164}}}}, "feature_projection": {"layer_norm": {"bias": 6.33607292175293, "scale": 16.545515060424805}, "projection": {"bias": 1.6633964776992798, "kernel": 34.67955017089844}}, "masked_spec_embed": 11.914372444152832}}, "train/learning_rate": 2.4749970179982483e-06, "train/loss": 3.565929889678955, "train/param_norm": 1185.912109375, "_runtime": 3511, "_timestamp": 1659188616, "_step": 100, "_wandb": {"runtime": 3512}} \ No newline at end of file diff --git a/wandb/run-20220730_124505-101ubxa3/logs/debug-internal.log b/wandb/run-20220730_124505-101ubxa3/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..ecc8595371217f43b00a149894bc31bfb7564acd --- /dev/null +++ b/wandb/run-20220730_124505-101ubxa3/logs/debug-internal.log @@ -0,0 +1,1110 @@ +2022-07-30 12:45:06,617 INFO MainThread:3213310 [internal.py:wandb_internal():87] W&B internal server running at pid: 3213310, started at: 2022-07-30 12:45:06.617158 +2022-07-30 12:45:06,619 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: check_version +2022-07-30 12:45:06,619 INFO WriterThread:3213310 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/run-101ubxa3.wandb +2022-07-30 12:45:06,620 DEBUG SenderThread:3213310 [sender.py:send():234] send: header +2022-07-30 12:45:06,620 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: check_version +2022-07-30 12:45:06,657 DEBUG SenderThread:3213310 [sender.py:send():234] send: run +2022-07-30 12:45:06,845 INFO SenderThread:3213310 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files +2022-07-30 12:45:06,845 INFO SenderThread:3213310 [sender.py:_start_run_threads():804] run started: 101ubxa3 with start time 1659185105 +2022-07-30 12:45:06,845 DEBUG SenderThread:3213310 [sender.py:send():234] send: summary +2022-07-30 12:45:06,845 INFO SenderThread:3213310 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 12:45:06,846 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: run_start +2022-07-30 12:45:07,852 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json +2022-07-30 12:45:09,201 DEBUG HandlerThread:3213310 [meta.py:__init__():40] meta init +2022-07-30 12:45:09,201 DEBUG HandlerThread:3213310 [meta.py:__init__():54] meta init done +2022-07-30 12:45:09,201 DEBUG HandlerThread:3213310 [meta.py:probe():214] probe +2022-07-30 12:45:09,203 DEBUG HandlerThread:3213310 [meta.py:_setup_git():204] setup git +2022-07-30 12:45:09,243 DEBUG HandlerThread:3213310 [meta.py:_setup_git():211] setup git done +2022-07-30 12:45:09,243 DEBUG HandlerThread:3213310 [meta.py:_save_code():92] save code +2022-07-30 12:45:09,256 DEBUG HandlerThread:3213310 [meta.py:_save_code():113] save code done +2022-07-30 12:45:09,256 DEBUG HandlerThread:3213310 [meta.py:_save_patches():130] save patches +2022-07-30 12:45:09,329 DEBUG HandlerThread:3213310 [meta.py:_save_patches():172] save patches done +2022-07-30 12:45:09,329 DEBUG HandlerThread:3213310 [meta.py:_save_pip():58] save pip +2022-07-30 12:45:09,330 DEBUG HandlerThread:3213310 [meta.py:_save_pip():72] save pip done +2022-07-30 12:45:09,330 DEBUG HandlerThread:3213310 [meta.py:probe():252] probe done +2022-07-30 12:45:09,333 DEBUG SenderThread:3213310 [sender.py:send():234] send: files +2022-07-30 12:45:09,334 INFO SenderThread:3213310 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-30 12:45:09,334 INFO SenderThread:3213310 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-30 12:45:09,335 INFO SenderThread:3213310 [sender.py:_save_file():939] saving file diff.patch with policy now +2022-07-30 12:45:09,340 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:45:09,341 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:45:09,793 INFO Thread-11 :3213310 [upload_job.py:push():137] Uploaded file /tmp/tmpxk6jhkl0wandb/2jcn1x2y-wandb-metadata.json +2022-07-30 12:45:09,798 INFO Thread-13 :3213310 [upload_job.py:push():137] Uploaded file /tmp/tmpxk6jhkl0wandb/35hrcegs-diff.patch +2022-07-30 12:45:09,852 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-metadata.json +2022-07-30 12:45:09,852 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/requirements.txt +2022-07-30 12:45:09,852 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:09,852 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/code/run_flax_speech_recognition_ctc.py +2022-07-30 12:45:09,852 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/diff.patch +2022-07-30 12:45:09,852 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/code +2022-07-30 12:45:10,017 INFO Thread-12 :3213310 [upload_job.py:push():137] Uploaded file /tmp/tmpxk6jhkl0wandb/1gn9s8a2-code/run_flax_speech_recognition_ctc.py +2022-07-30 12:45:11,853 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:13,853 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:15,854 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:17,855 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:23,858 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:24,476 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:45:24,476 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:45:25,859 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:37,287 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:45:39,613 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:45:39,613 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:45:39,866 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:41,867 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:47,870 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:49,871 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:51,872 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:45:54,838 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:45:54,838 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:46:04,877 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:46:06,878 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:46:07,361 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:46:09,970 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:46:09,970 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:46:25,129 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:46:25,130 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:46:37,434 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:46:40,325 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:46:40,325 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:46:44,894 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:46:46,895 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:46:48,896 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:46:50,896 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:46:52,897 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:46:54,898 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:46:55,470 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:46:55,470 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:46:56,899 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:46:59,900 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:01,902 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:03,902 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:05,903 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:07,507 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:47:07,904 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:09,905 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:10,659 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:47:10,659 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:47:11,906 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:13,907 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:15,908 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:17,909 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:19,910 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:21,911 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:23,911 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:25,837 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:47:25,837 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:47:25,912 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:27,913 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:29,914 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:31,915 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:33,917 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:35,922 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:37,575 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:47:37,923 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:39,924 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:40,983 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:47:40,983 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:47:41,925 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:43,926 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:45,927 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:47,928 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:49,929 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:51,930 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:53,932 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:55,934 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:56,129 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:47:56,129 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:47:57,935 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:47:59,936 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:01,937 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:03,938 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:05,939 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:07,656 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:48:07,942 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:09,943 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:11,275 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:48:11,275 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:48:11,944 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:13,945 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:15,946 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:17,947 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:19,948 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:21,949 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:23,950 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:25,951 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:26,411 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:48:26,411 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:48:27,953 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:29,954 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:31,959 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:33,958 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:35,959 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:37,746 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:48:37,960 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:39,961 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:41,594 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:48:41,595 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:48:41,962 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:43,963 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:45,966 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:47,967 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:49,968 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:51,969 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:53,970 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:55,971 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:56,741 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:48:56,741 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:48:57,972 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:48:59,973 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:01,975 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:03,975 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:05,977 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:07,828 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:49:07,978 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:09,978 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:11,901 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:49:11,901 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:49:11,980 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:13,984 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:16,985 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:18,986 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:20,987 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:22,988 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:24,989 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:26,990 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:27,047 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:49:27,047 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:49:28,993 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:30,994 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:32,995 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:34,996 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:36,997 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:37,907 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:49:38,998 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:40,999 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:42,190 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:49:42,190 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:49:43,000 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:45,001 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:47,002 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:49,003 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:51,004 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:53,005 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:55,005 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:57,007 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:49:57,360 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:49:57,360 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:49:59,007 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:01,008 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:03,010 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:05,011 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:07,012 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:07,985 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:50:09,012 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:11,013 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:12,517 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:50:12,517 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:50:13,014 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:15,015 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:17,016 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:19,017 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:21,018 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:23,019 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:25,020 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:27,021 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:27,672 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:50:27,673 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:50:29,022 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:31,023 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:33,024 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:35,025 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:37,026 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:38,058 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:50:39,028 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:41,029 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:42,815 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:50:42,815 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:50:43,029 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:45,030 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:47,031 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:49,033 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:51,033 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:53,034 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:55,035 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:57,037 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:50:57,998 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:50:57,999 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:50:59,038 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:51:01,039 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:51:03,040 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:51:05,041 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:51:07,042 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:51:08,133 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:51:09,043 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:51:11,044 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:51:13,045 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:51:13,144 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:51:13,144 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:51:28,283 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:51:28,283 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:51:38,215 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:51:43,434 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:51:43,434 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:51:58,575 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:51:58,575 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:52:05,067 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:07,068 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:08,288 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:52:09,069 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:11,070 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:13,071 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:13,716 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:52:13,716 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:52:15,072 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:28,860 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:52:28,861 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:52:38,358 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:52:44,095 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:52:44,095 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:52:47,086 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:49,087 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:51,088 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:53,089 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:55,089 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:57,090 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:59,091 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:52:59,239 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:52:59,239 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:53:01,092 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:03,093 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:05,094 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:07,095 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:08,436 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:53:09,096 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:11,097 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:13,098 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:14,376 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:53:14,377 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:53:15,098 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:23,102 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:25,103 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:27,104 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:29,105 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:29,516 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:53:29,517 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:53:31,106 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:53:38,508 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:53:44,655 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:53:44,655 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:53:59,827 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:53:59,828 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:54:04,119 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:06,120 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:08,121 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:08,586 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:54:10,123 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:12,124 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:14,127 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:15,107 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:54:15,107 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:54:16,128 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:18,129 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:20,130 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:22,131 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:24,132 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:26,133 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:28,134 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:30,135 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:30,262 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:54:30,262 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:54:32,136 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:34,137 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:36,138 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:38,139 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:38,659 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:54:40,140 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:42,141 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:44,142 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:45,396 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:54:45,396 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:54:46,143 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:48,144 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:50,145 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:52,147 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:54,147 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:56,148 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:54:58,149 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:00,150 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:00,598 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:55:00,598 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:55:02,151 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:04,152 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:06,153 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:08,154 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:08,768 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:55:10,155 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:12,156 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:14,157 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:15,738 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:55:15,738 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:55:16,158 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:18,159 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:20,160 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:22,161 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:24,163 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:55:30,876 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:55:30,876 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:55:38,848 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:55:46,017 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:55:46,018 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:56:01,241 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:56:01,241 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:56:04,179 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:56:08,926 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:56:09,181 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:56:15,184 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:56:16,404 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:56:16,405 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:56:19,186 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:56:25,188 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:56:29,190 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:56:31,191 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:56:31,542 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:56:31,542 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:56:35,193 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:56:37,194 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:56:38,998 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:56:40,195 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:56:46,948 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:56:46,949 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:56:48,199 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:57:02,212 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:57:02,213 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:57:09,068 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:57:17,413 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:57:17,413 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:57:21,212 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:57:30,215 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:57:32,216 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 12:57:32,586 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:57:32,586 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:57:39,135 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:57:47,750 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:57:47,750 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:58:02,898 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:58:02,899 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:58:09,202 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:58:18,035 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:58:18,035 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:58:33,178 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:58:33,179 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:58:39,274 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:58:48,322 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:58:48,323 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:59:03,460 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:59:03,461 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:59:09,352 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:59:18,648 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:59:18,648 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:59:33,784 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:59:33,784 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 12:59:39,428 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 12:59:48,918 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 12:59:48,918 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:00:04,064 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:00:04,065 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:00:09,507 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:00:19,201 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:00:19,201 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:00:34,362 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:00:34,362 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:00:39,578 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:00:49,498 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:00:49,498 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:01:04,632 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:01:04,632 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:01:06,305 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:01:09,652 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:01:19,785 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:01:19,786 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:01:35,613 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:01:35,614 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:01:39,720 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:01:50,783 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:01:50,784 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:02:05,947 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:02:05,948 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:02:09,790 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:02:21,092 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:02:21,093 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:02:36,227 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:02:36,227 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:02:39,851 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:02:51,379 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:02:51,379 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:03:06,513 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:03:06,513 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:03:09,911 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:03:21,647 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:03:21,648 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:03:36,784 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:03:36,785 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:03:39,971 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:03:51,917 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:03:51,918 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:04:07,054 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:04:07,054 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:04:10,032 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:04:22,188 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:04:22,189 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:04:37,331 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:04:37,331 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:04:40,103 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:04:52,469 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:04:52,470 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:05:07,622 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:05:07,622 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:05:10,260 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:05:22,759 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:05:22,759 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:05:37,898 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:05:37,898 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:05:40,392 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:05:53,030 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:05:53,030 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:05:59,441 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:06:08,183 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:06:08,183 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:06:10,469 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:06:23,365 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:06:23,366 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:06:38,541 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:06:38,541 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:06:40,541 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:06:53,708 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:06:53,708 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:07:08,856 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:07:08,857 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:07:10,613 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:07:23,993 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:07:23,994 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:07:39,131 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:07:39,132 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:07:40,685 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:07:54,269 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:07:54,269 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:08:09,404 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:08:09,404 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:08:10,758 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:08:24,535 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:08:24,535 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:08:39,691 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:08:39,692 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:08:40,832 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:08:54,829 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:08:54,830 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:09:09,962 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:09:09,962 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:09:10,906 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:09:25,099 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:09:25,100 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:09:40,232 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:09:40,232 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:09:40,985 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:09:55,370 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:09:55,370 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:10:10,509 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:10:10,510 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:10:11,110 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:10:25,644 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:10:25,645 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:10:38,564 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:10:40,914 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:10:40,914 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:10:41,186 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:10:56,126 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:10:56,126 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:11:11,252 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:11:12,387 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:11:12,388 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:11:27,560 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:11:27,560 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:11:41,317 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:11:42,716 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:11:42,717 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:11:57,856 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:11:57,856 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:12:11,384 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:12:12,991 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:12:12,991 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:12:28,125 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:12:28,126 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:12:41,447 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:12:43,268 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:12:43,268 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:12:58,401 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:12:58,401 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:13:11,511 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:13:13,539 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:13:13,540 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:13:28,676 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:13:28,676 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:13:41,574 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:13:43,810 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:13:43,810 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:13:58,947 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:13:58,947 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:14:11,645 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:14:14,081 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:14:14,081 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:14:29,223 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:14:29,224 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:14:41,758 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:14:44,371 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:14:44,372 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:14:59,512 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:14:59,512 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:15:11,833 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:15:13,678 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:15:14,723 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:15:14,724 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:15:21,681 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:15:29,685 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:15:29,989 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:15:29,989 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:15:35,687 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:15:41,690 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:15:41,906 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:15:45,300 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:15:45,300 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:16:00,579 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:16:00,579 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:16:11,977 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:16:15,736 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:16:15,736 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:16:30,900 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:16:30,901 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:16:42,049 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:16:46,055 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:16:46,056 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:17:01,192 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:17:01,192 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:17:12,124 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:17:16,331 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:17:16,331 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:17:31,491 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:17:31,491 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:17:42,202 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:17:46,625 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:17:46,626 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:18:01,759 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:18:01,760 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:18:12,279 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:18:16,894 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:18:16,895 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:18:32,031 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:18:32,032 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:18:42,353 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:18:47,165 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:18:47,165 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:19:02,299 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:19:02,300 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:19:12,434 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:19:17,435 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:19:17,435 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:19:32,568 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:19:32,569 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:19:42,522 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:19:47,713 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:19:47,713 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:20:02,846 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:20:02,846 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:20:07,800 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:20:12,601 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:20:14,802 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:20:17,994 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:20:17,995 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:20:20,805 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:20:26,807 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:20:32,810 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:20:33,202 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:20:33,202 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:20:36,812 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:20:42,678 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:20:42,814 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:20:48,480 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:20:48,480 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:20:48,816 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:20:52,818 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:20:57,820 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:21:04,520 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:21:04,520 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:21:12,749 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:21:19,684 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:21:19,684 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:21:35,447 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:21:35,447 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:21:42,819 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:21:50,612 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:21:50,612 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:22:05,781 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:22:05,781 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:22:12,892 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:22:20,914 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:22:20,914 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:22:36,115 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:22:36,115 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:22:42,967 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:22:51,252 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:22:51,252 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:23:06,386 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:23:06,387 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:23:13,048 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:23:21,529 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:23:21,529 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:23:36,663 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:23:36,663 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:23:43,122 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:23:51,796 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:23:51,797 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:24:06,930 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:24:06,930 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:24:13,195 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:24:22,074 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:24:22,075 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:24:37,210 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:24:37,211 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:24:43,275 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:24:52,347 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:24:52,347 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:25:07,485 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:25:07,485 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:25:13,345 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:25:22,735 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:25:22,736 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:25:36,933 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:25:37,929 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:25:37,930 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:25:42,936 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:25:43,420 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:25:46,937 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:25:50,939 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:25:53,203 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:25:53,203 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:25:55,941 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:25:59,943 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:26:03,944 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:26:07,946 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:26:08,456 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:26:08,457 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:26:11,948 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:26:13,488 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:26:15,950 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:26:17,951 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:26:21,953 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:26:23,680 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:26:23,680 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:26:25,955 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:26:29,957 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:26:38,861 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:26:38,861 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:26:43,562 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:26:54,036 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:26:54,037 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:27:09,197 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:27:09,197 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:27:13,633 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:27:24,357 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:27:24,357 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:27:39,503 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:27:39,504 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:27:43,705 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:27:54,636 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:27:54,636 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:28:09,771 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:28:09,771 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:28:13,775 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:28:24,903 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:28:24,903 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:28:40,037 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:28:40,037 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:28:43,847 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:28:55,172 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:28:55,172 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:29:10,312 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:29:10,312 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:29:13,913 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:29:25,452 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:29:25,452 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:29:40,583 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:29:40,583 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:29:43,983 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:29:55,726 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:29:55,727 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:30:10,863 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:30:10,863 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:30:14,064 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:30:25,994 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:30:25,994 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:30:41,136 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:30:41,136 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:30:44,160 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:30:56,075 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:30:56,364 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:30:56,364 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:31:00,076 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:02,077 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:04,078 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:08,080 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:10,081 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:11,604 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:31:11,604 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:31:12,082 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:14,083 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:14,232 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:31:16,084 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:18,084 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:20,085 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:22,086 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:24,087 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:26,088 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:31:26,782 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:31:26,782 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:31:41,945 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:31:41,945 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:31:44,299 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:31:57,117 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:31:57,118 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:32:12,306 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:32:12,307 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:32:14,371 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:32:27,461 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:32:27,461 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:32:42,600 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:32:42,600 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:32:44,438 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:32:57,733 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:32:57,733 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:33:12,866 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:33:12,867 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:33:14,502 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:33:28,002 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:33:28,002 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:33:43,160 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:33:43,160 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:33:44,567 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:33:58,360 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:33:58,360 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:34:13,502 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:34:13,502 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:34:14,630 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:34:28,643 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:34:28,644 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:34:43,773 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:34:43,774 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:34:44,700 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:34:58,909 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:34:58,909 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:35:14,047 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:35:14,048 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:35:14,771 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:35:29,206 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:35:29,207 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:35:44,363 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:35:44,364 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:35:44,843 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:35:46,204 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:35:48,205 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:35:50,206 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:35:59,530 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:35:59,531 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:36:04,211 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:36:14,672 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:36:14,672 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:36:14,917 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:36:18,216 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:36:28,220 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:36:29,880 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:36:29,881 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:36:36,223 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:36:44,992 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:36:45,187 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:36:45,187 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:36:45,226 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:36:53,229 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:36:59,232 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:37:00,576 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:37:00,577 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:37:05,234 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:37:13,237 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:37:15,058 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:37:16,030 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:37:16,030 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:37:19,240 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:37:26,242 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:37:30,244 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:37:31,288 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:37:31,288 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:37:36,246 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:37:42,248 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:37:45,121 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:37:46,250 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:37:46,486 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:37:46,486 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:37:52,252 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:37:56,254 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:01,802 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:38:01,802 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:38:03,257 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:07,258 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:13,260 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:15,183 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:38:17,054 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:38:17,054 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:38:17,262 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:21,264 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:25,265 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:31,268 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:32,270 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:38:32,271 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:38:35,269 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:39,271 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:43,272 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:45,251 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:38:45,273 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:47,491 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:38:47,492 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:38:49,275 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:54,277 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:38:58,278 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:02,280 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:02,659 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:39:02,660 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:39:08,282 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:12,284 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:14,284 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:15,317 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:39:17,882 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:39:17,882 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:39:18,286 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:20,287 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:22,288 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:26,289 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:28,290 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:30,291 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:32,292 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:33,072 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:39:33,072 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:39:34,293 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:36,294 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:38,295 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:40,295 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:42,296 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:44,297 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:45,389 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:39:46,298 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:39:48,375 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:39:48,376 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:39:59,303 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:40:03,766 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:40:03,766 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:40:07,306 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:40:15,309 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:40:15,462 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:40:18,999 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:40:18,999 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:40:23,312 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:40:31,315 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:40:34,399 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:40:34,399 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:40:38,318 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:40:44,320 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:40:45,535 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:40:49,639 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:40:49,639 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:40:52,324 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:40:58,326 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:04,329 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:04,858 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:41:04,858 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:41:08,330 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:14,332 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:15,607 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:41:20,174 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:41:20,175 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:41:21,335 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:27,337 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:31,339 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:35,474 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:41:35,474 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:41:37,341 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:41,343 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:45,680 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:41:47,345 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:50,752 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:41:50,753 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:41:51,347 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:55,348 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:41:59,350 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:05,986 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:42:05,987 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:42:06,353 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:10,354 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:14,356 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:15,754 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:42:18,358 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:21,265 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:42:21,265 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:42:22,359 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:26,361 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:28,362 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:32,363 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:36,365 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:36,509 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:42:36,510 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:42:40,367 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:42,368 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:45,826 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:42:47,370 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:49,370 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:51,762 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:42:51,762 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:42:53,372 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:55,373 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:57,374 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:42:59,375 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:03,377 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:05,378 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:06,968 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:43:06,969 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:43:07,378 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:09,379 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:13,381 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:15,382 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:15,898 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:43:17,383 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:21,385 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:22,146 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:43:22,146 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:43:23,386 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:25,387 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:27,387 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:37,016 DEBUG SenderThread:3213310 [sender.py:send():234] send: history +2022-07-30 13:43:37,020 DEBUG SenderThread:3213310 [sender.py:send():234] send: summary +2022-07-30 13:43:37,025 INFO SenderThread:3213310 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 13:43:37,391 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json +2022-07-30 13:43:38,392 INFO Thread-8 :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:39,441 DEBUG SenderThread:3213310 [sender.py:send():234] send: telemetry +2022-07-30 13:43:39,442 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:39,442 DEBUG SenderThread:3213310 [sender.py:send():234] send: exit +2022-07-30 13:43:39,442 INFO SenderThread:3213310 [sender.py:send_exit():366] handling exit code: 1 +2022-07-30 13:43:39,443 INFO SenderThread:3213310 [sender.py:send_exit():368] handling runtime: 3512 +2022-07-30 13:43:39,446 INFO SenderThread:3213310 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 13:43:39,446 INFO SenderThread:3213310 [sender.py:send_exit():374] send defer +2022-07-30 13:43:39,446 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:39,447 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer +2022-07-30 13:43:39,447 INFO HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-30 13:43:39,447 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: defer +2022-07-30 13:43:39,447 INFO SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-30 13:43:39,447 INFO SenderThread:3213310 [sender.py:transition_state():387] send defer: 1 +2022-07-30 13:43:39,447 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer +2022-07-30 13:43:39,447 INFO HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-30 13:43:39,454 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: defer +2022-07-30 13:43:39,454 INFO SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-30 13:43:39,454 INFO SenderThread:3213310 [sender.py:transition_state():387] send defer: 2 +2022-07-30 13:43:39,454 DEBUG SenderThread:3213310 [sender.py:send():234] send: stats +2022-07-30 13:43:39,455 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer +2022-07-30 13:43:39,455 INFO HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-30 13:43:39,455 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: defer +2022-07-30 13:43:39,455 INFO SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-30 13:43:39,455 INFO SenderThread:3213310 [sender.py:transition_state():387] send defer: 3 +2022-07-30 13:43:39,455 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer +2022-07-30 13:43:39,455 INFO HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-30 13:43:39,457 DEBUG SenderThread:3213310 [sender.py:send():234] send: summary +2022-07-30 13:43:39,461 INFO SenderThread:3213310 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 13:43:39,461 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: defer +2022-07-30 13:43:39,461 INFO SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-30 13:43:39,461 INFO SenderThread:3213310 [sender.py:transition_state():387] send defer: 4 +2022-07-30 13:43:39,461 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer +2022-07-30 13:43:39,461 INFO HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-30 13:43:39,462 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: defer +2022-07-30 13:43:39,462 INFO SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-30 13:43:39,548 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:39,677 INFO SenderThread:3213310 [sender.py:transition_state():387] send defer: 5 +2022-07-30 13:43:39,677 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:39,677 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer +2022-07-30 13:43:39,678 INFO HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-30 13:43:39,678 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: defer +2022-07-30 13:43:39,678 INFO SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-30 13:43:39,678 INFO SenderThread:3213310 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-30 13:43:39,779 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:40,393 INFO SenderThread:3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:40,393 INFO SenderThread:3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/config.yaml +2022-07-30 13:43:40,393 INFO SenderThread:3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json +2022-07-30 13:43:40,394 INFO SenderThread:3213310 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files +2022-07-30 13:43:40,394 INFO SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/config.yaml config.yaml +2022-07-30 13:43:40,394 INFO SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/diff.patch diff.patch +2022-07-30 13:43:40,394 INFO SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/requirements.txt requirements.txt +2022-07-30 13:43:40,397 INFO SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log output.log +2022-07-30 13:43:40,397 INFO SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json wandb-summary.json +2022-07-30 13:43:40,398 INFO SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-metadata.json wandb-metadata.json +2022-07-30 13:43:40,400 INFO SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-30 13:43:40,401 INFO SenderThread:3213310 [sender.py:transition_state():387] send defer: 6 +2022-07-30 13:43:40,401 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:40,407 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer +2022-07-30 13:43:40,407 INFO HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-30 13:43:40,407 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: defer +2022-07-30 13:43:40,408 INFO SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-30 13:43:40,408 INFO SenderThread:3213310 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 13:43:40,506 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:40,506 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:40,608 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:40,608 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:40,710 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:40,710 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:40,812 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:40,812 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:40,877 INFO Thread-17 :3213310 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json +2022-07-30 13:43:40,883 INFO Thread-14 :3213310 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/config.yaml +2022-07-30 13:43:40,898 INFO Thread-15 :3213310 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/requirements.txt +2022-07-30 13:43:40,914 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:40,914 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:41,016 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:41,016 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:41,118 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:41,118 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:41,139 INFO Thread-16 :3213310 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log +2022-07-30 13:43:41,220 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:41,220 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:41,322 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:41,322 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:41,339 INFO Thread-7 :3213310 [sender.py:transition_state():387] send defer: 7 +2022-07-30 13:43:41,340 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer +2022-07-30 13:43:41,340 INFO HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-30 13:43:41,340 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: defer +2022-07-30 13:43:41,340 INFO SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-30 13:43:41,424 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:42,442 INFO SenderThread:3213310 [sender.py:transition_state():387] send defer: 8 +2022-07-30 13:43:42,442 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:42,443 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer +2022-07-30 13:43:42,443 INFO HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-30 13:43:42,443 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: defer +2022-07-30 13:43:42,443 INFO SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-30 13:43:42,443 INFO SenderThread:3213310 [sender.py:transition_state():387] send defer: 9 +2022-07-30 13:43:42,444 DEBUG SenderThread:3213310 [sender.py:send():234] send: final +2022-07-30 13:43:42,444 DEBUG SenderThread:3213310 [sender.py:send():234] send: footer +2022-07-30 13:43:42,444 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer +2022-07-30 13:43:42,444 INFO HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-30 13:43:42,444 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: defer +2022-07-30 13:43:42,444 INFO SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-30 13:43:42,544 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 13:43:42,544 DEBUG SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 13:43:42,544 INFO SenderThread:3213310 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 13:43:42,809 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: get_summary +2022-07-30 13:43:42,814 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-30 13:43:42,815 DEBUG HandlerThread:3213310 [handler.py:handle_request():130] handle_request: shutdown +2022-07-30 13:43:42,815 INFO HandlerThread:3213310 [handler.py:finish():731] shutting down handler +2022-07-30 13:43:43,445 INFO WriterThread:3213310 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/run-101ubxa3.wandb +2022-07-30 13:43:43,808 INFO SenderThread:3213310 [sender.py:finish():1070] shutting down sender +2022-07-30 13:43:43,808 INFO SenderThread:3213310 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 13:43:43,808 INFO SenderThread:3213310 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 13:43:43,856 INFO MainThread:3213310 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220730_124505-101ubxa3/logs/debug.log b/wandb/run-20220730_124505-101ubxa3/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..3889692f0a3d75f04ba41391256d56f540066bac --- /dev/null +++ b/wandb/run-20220730_124505-101ubxa3/logs/debug.log @@ -0,0 +1,159 @@ +2022-07-30 12:45:05,741 INFO MainThread:3212038 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-30 12:45:05,741 INFO MainThread:3212038 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-30 12:45:05,741 INFO MainThread:3212038 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/logs/debug.log +2022-07-30 12:45:05,741 INFO MainThread:3212038 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/logs/debug-internal.log +2022-07-30 12:45:05,742 INFO MainThread:3212038 [wandb_init.py:init():404] calling init triggers +2022-07-30 12:45:05,742 INFO MainThread:3212038 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-30 12:45:05,742 INFO MainThread:3212038 [wandb_init.py:init():460] starting backend +2022-07-30 12:45:05,742 INFO MainThread:3212038 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-30 12:45:05,785 INFO MainThread:3212038 [backend.py:ensure_launched():216] starting backend process... +2022-07-30 12:45:05,826 INFO MainThread:3212038 [backend.py:ensure_launched():221] started backend process with pid: 3213310 +2022-07-30 12:45:05,828 INFO MainThread:3212038 [wandb_init.py:init():469] backend started and connected +2022-07-30 12:45:05,841 INFO MainThread:3212038 [wandb_init.py:init():533] updated telemetry +2022-07-30 12:45:05,942 INFO MainThread:3212038 [wandb_init.py:init():563] communicating current version +2022-07-30 12:45:06,656 INFO MainThread:3212038 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-30 12:45:06,656 INFO MainThread:3212038 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-30 12:45:06,845 INFO MainThread:3212038 [wandb_init.py:init():606] starting run threads in backend +2022-07-30 12:45:09,337 INFO MainThread:3212038 [wandb_run.py:_console_start():1810] atexit reg +2022-07-30 12:45:09,338 INFO MainThread:3212038 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-30 12:45:09,338 INFO MainThread:3212038 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-30 12:45:09,340 INFO MainThread:3212038 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-30 12:45:09,340 INFO MainThread:3212038 [wandb_init.py:init():633] run started, returning control to user process +2022-07-30 13:43:36,388 INFO MainThread:3212038 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-30 13:43:37,009 INFO MainThread:3212038 [wandb_run.py:_restore():1752] restore +2022-07-30 13:43:39,447 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 74351 +} + +2022-07-30 13:43:39,678 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 74351 +} + +2022-07-30 13:43:40,405 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 417315 +} + +2022-07-30 13:43:40,507 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 417315 +} + +2022-07-30 13:43:40,609 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 417315 + total_bytes: 417315 +} + +2022-07-30 13:43:40,711 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 417315 + total_bytes: 417315 +} + +2022-07-30 13:43:40,813 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 417315 + total_bytes: 417315 +} + +2022-07-30 13:43:40,915 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 417315 + total_bytes: 417315 +} + +2022-07-30 13:43:41,017 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 417315 + total_bytes: 417315 +} + +2022-07-30 13:43:41,119 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 417315 + total_bytes: 417315 +} + +2022-07-30 13:43:41,221 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 417315 + total_bytes: 417315 +} + +2022-07-30 13:43:41,323 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 417315 + total_bytes: 417315 +} + +2022-07-30 13:43:42,443 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 417315 + total_bytes: 417315 +} + +2022-07-30 13:43:42,808 INFO MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 417315 + total_bytes: 417315 +} +local_info { +} + +2022-07-30 13:43:44,539 INFO MainThread:3212038 [wandb_run.py:_append_history():2130] rendering history +2022-07-30 13:43:44,540 INFO MainThread:3212038 [wandb_run.py:_append_summary():2085] rendering summary +2022-07-30 13:43:44,540 INFO MainThread:3212038 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220730_124505-101ubxa3/run-101ubxa3.wandb b/wandb/run-20220730_124505-101ubxa3/run-101ubxa3.wandb new file mode 100644 index 0000000000000000000000000000000000000000..9e9bbb994f94d21243c2bc8b59bbd49f15caa492 --- /dev/null +++ b/wandb/run-20220730_124505-101ubxa3/run-101ubxa3.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eb5e98d359963f8ef635f536561c6dd8813a3353f09776016ed802720a46201 +size 577536 diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_135604-y1b5rbiq/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac --- /dev/null +++ b/wandb/run-20220730_135604-y1b5rbiq/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"])) # Use same train/val ratio as NPSC + nst_train = nst["train"].train_test_split(train_size=split, seed=seed) + nst["train"] = nst_train["train"] + nst["validation"] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in "train", "validation", "test": + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets["train"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets["eval"] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets["train"] = raw_datasets["train"].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets["train"]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix="train") + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in test_split: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml b/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d087d516b40328c4f00700dc5c86a4eb1aa22196 --- /dev/null +++ b/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml @@ -0,0 +1,33 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659189364 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 2: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/diff.patch b/wandb/run-20220730_135604-y1b5rbiq/files/diff.patch new file mode 100644 index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642 --- /dev/null +++ b/wandb/run-20220730_135604-y1b5rbiq/files/diff.patch @@ -0,0 +1,10 @@ +diff --git a/.gitattributes b/.gitattributes +index 755356a..f0eef4b 100644 +--- a/.gitattributes ++++ b/.gitattributes +@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text + *.zip filter=lfs diff=lfs merge=lfs -text + *.zstandard filter=lfs diff=lfs merge=lfs -text + *tfevents* filter=lfs diff=lfs merge=lfs -text ++*.wandb filter=lfs diff=lfs merge=lfs -text +\ No newline at end of file diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/output.log b/wandb/run-20220730_135604-y1b5rbiq/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7b40cae250aeb9a966336aa92b84cadcc597bbaa --- /dev/null +++ b/wandb/run-20220730_135604-y1b5rbiq/files/output.log @@ -0,0 +1,2165 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul30_13-55-59_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=default, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=12, +per_device_train_batch_size=12, +precision=full_mixed, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: default +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 48.48it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 301.86it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'kernel'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'kernel'), ('project_hid', 'bias'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'codevectors'), ('project_q', 'bias')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9016.64ex/s] +removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8987.54ex/s] +removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8774.46ex/s] +removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8781.68ex/s] +removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7529.19ex/s] +removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8682.85ex/s] +removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8696.66ex/s] +removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8419.58ex/s] +removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7780.02ex/s] +removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8423.60ex/s] +removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8314.01ex/s] +removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8360.27ex/s] +removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8388.07ex/s] +removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8076.60ex/s] +removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8385.87ex/s] +removing punctuation from train split #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8247.25ex/s] +removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8282.62ex/s] +removing punctuation from train split #9: 90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8561/9523 [00:01<00:00, 8038.51ex/s] +removing punctuation from train split #10: 91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8631/9523 [00:01<00:00, 8237.86ex/s] +removing punctuation from train split #9: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9410/9523 [00:01<00:00, 8168.88ex/s] +removing punctuation from train split #12: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 7719/9522 [00:00<00:00, 8021.73ex/s] +removing punctuation from train split #12: 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8581/9522 [00:01<00:00, 8196.22ex/s] +removing punctuation from train split #11: 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9412/9523 [00:01<00:00, 8255.67ex/s] +removing punctuation from train split #13: 77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 7342/9522 [00:00<00:00, 7892.60ex/s] +removing punctuation from train split #12: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9434/9522 [00:01<00:00, 8293.39ex/s] +removing punctuation from train split #13: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 8193/9522 [00:01<00:00, 8072.17ex/s] +removing punctuation from train split #13: 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9047/9522 [00:01<00:00, 8208.50ex/s] +removing punctuation from train split #14: 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 8565/9522 [00:01<00:00, 8252.60ex/s] +removing punctuation from train split #14: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9434/9522 [00:01<00:00, 8381.45ex/s] +removing punctuation from train split #16: 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8460/9522 [00:01<00:00, 8176.76ex/s] +removing punctuation from train split #17: 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 7594/9522 [00:00<00:00, 8040.57ex/s] +removing punctuation from train split #18: 71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 6741/9522 [00:00<00:00, 8561.35ex/s] +removing punctuation from train split #16: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9301/9522 [00:01<00:00, 8245.09ex/s] +removing punctuation from train split #20: 62%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 5874/9522 [00:00<00:00, 8506.59ex/s] +removing punctuation from train split #17: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9322/9522 [00:01<00:00, 8341.78ex/s] +removing punctuation from train split #18: 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 8468/9522 [00:01<00:00, 8226.73ex/s] +removing punctuation from train split #19: 79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 7480/9522 [00:00<00:00, 8193.61ex/s] +removing punctuation from train split #22: 52%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 4987/9522 [00:00<00:00, 8517.27ex/s] +removing punctuation from train split #21: 70%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 6707/9522 [00:00<00:00, 8578.07ex/s] +removing punctuation from train split #22: 61%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 5839/9522 [00:00<00:00, 8351.90ex/s] +removing punctuation from train split #23: 47%|████████████████████████████████████████████████████████████████████████████████████████████▉ | 4513/9522 [00:00<00:00, 7286.27ex/s] +removing punctuation from train split #24: 53%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 5016/9522 [00:00<00:00, 8458.83ex/s] +removing punctuation from train split #27: 26%|██████████████████████████████████████████████████▉ | 2476/9522 [00:00<00:00, 8333.80ex/s] +removing punctuation from train split #26: 42%|█████████████████████████████████████████████████████████████████████████████████▌ | 3960/9522 [00:00<00:00, 8242.89ex/s] +removing punctuation from train split #27: 35%|████████████████████████████████████████████████████████████████████▋ | 3339/9522 [00:00<00:00, 8449.30ex/s] +removing punctuation from train split #28: 25%|████████████████████████████████████████████████▌ | 2361/9522 [00:00<00:00, 8018.54ex/s] +removing punctuation from train split #29: 26%|██████████████████████████████████████████████████▍ | 2449/9522 [00:00<00:00, 8231.06ex/s] +removing punctuation from train split #25: 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8523/9522 [00:01<00:00, 7439.03ex/s] +removing punctuation from train split #26: 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 8361/9522 [00:01<00:00, 7965.30ex/s] +removing punctuation from train split #27: 74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 7019/9522 [00:00<00:00, 9069.67ex/s] +removing punctuation from train split #25: 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9443/9522 [00:01<00:00, 7922.19ex/s] +removing punctuation from train split #27: 84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 7979/9522 [00:00<00:00, 9233.51ex/s] +removing punctuation from train split #26: 97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9277/9522 [00:01<00:00, 8308.04ex/s] +removing punctuation from train split #27: 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 8948/9522 [00:01<00:00, 9372.69ex/s] +removing punctuation from train split #28: 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7663/9522 [00:00<00:00, 8311.71ex/s] +removing punctuation from train split #30: 73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 6963/9522 [00:00<00:00, 8726.86ex/s] +removing punctuation from train split #28: 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 8597/9522 [00:01<00:00, 8617.07ex/s] +removing punctuation from train split #29: 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 8736/9522 [00:01<00:00, 8466.25ex/s] +removing punctuation from train split #30: 93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 8813/9522 [00:01<00:00, 8988.36ex/s] +removing punctuation from train split #31: 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 8046/9522 [00:00<00:00, 8800.51ex/s] +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00020_of_00032.arrow8945/9522 [00:01<00:00, 8834.57ex/s] +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00021_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00022_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow +preprocess dataset #0: 0%| | 0/9497 [00:00 to the vocabulary +Adding to the vocabulary +2022-07-30 14:07:45.463224: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2022-07-30 14:07:45.463276: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) +/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) +INFO:__main__:***** Running training ***** +INFO:__main__: Num examples = 302693 +INFO:__main__: Num Epochs = 40 +INFO:__main__: Instantaneous batch size per device = 12 +INFO:__main__: Num gradient accumulation steps = 1 +INFO:__main__: Total train batch size (w. parallel & distributed) = 96 +INFO:__main__: Total optimization steps = 126120 +INFO:__main__: Gradient checkpointing: True +INFO:__main__: Use scan: False +INFO:__main__: Fuse matmuls: False +Epoch ... (1/40): 0%| | 0/40 [00:00 + main() + File "run_flax_speech_recognition_ctc.py", line 1541, in main + run_evaluation(cur_step) + File "run_flax_speech_recognition_ctc.py", line 1442, in run_evaluation + eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size) + File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 50, in __getitem__ + return super().__getitem__(k) +KeyError: 'eval' \ No newline at end of file diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt b/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab --- /dev/null +++ b/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt @@ -0,0 +1,158 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +backcall==0.2.0 +cachetools==4.2.4 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.10 +chex==0.1.3 +click==8.0.3 +cloud-tpu-client==0.10 +cloud-tpu-profiler==2.4.0 +clu==0.0.6 +colorama==0.4.5 +commonmark==0.9.1 +configparser==5.2.0 +contextlib2==21.6.0 +cycler==0.11.0 +datasets==2.4.0 +decorator==5.1.0 +dill==0.3.4 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +etils==0.6.0 +exceptiongroup==1.0.0rc8 +filelock==3.4.2 +flatbuffers==2.0 +flax==0.5.3 +fonttools==4.28.5 +frozenlist==1.2.0 +fsspec==2021.11.1 +future==0.18.2 +gast==0.4.0 +gitdb==4.0.9 +gitpython==3.1.26 +google-api-core==1.31.5 +google-api-python-client==1.8.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==0.4.6 +google-auth==2.3.3 +google-pasta==0.2.0 +googleapis-common-protos==1.54.0 +grpcio==1.43.0 +h5py==3.6.0 +httplib2==0.20.2 +huggingface-hub==0.2.1 +hypothesis==6.53.0 +idna==3.3 +importlib-metadata==4.10.0 +importlib-resources==5.4.0 +ipython==7.31.0 +jax==0.3.15 +jaxlib==0.3.15 +jedi==0.18.1 +jiwer==2.3.0 +joblib==1.1.0 +keras-preprocessing==1.1.2 +keras==2.7.0 +kiwisolver==1.3.2 +libclang==12.0.0 +librosa==0.9.2 +libtpu-nightly==0.1.dev20220722 +llvmlite==0.39.0 +markdown==3.3.6 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +ml-collections==0.1.0 +msgpack==1.0.3 +multidict==5.2.0 +multiprocess==0.70.12.2 +numba==0.56.0 +numpy==1.22.0 +oauth2client==4.1.3 +oauthlib==3.1.1 +opt-einsum==3.3.0 +optax==0.1.3 +packaging==21.3 +pandas==1.3.5 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +pillow==9.0.0 +pip==22.2.1 +pkg-resources==0.0.0 +pooch==1.6.0 +promise==2.3 +prompt-toolkit==3.0.24 +protobuf==3.19.1 +psutil==5.9.0 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pyctcdecode==0.4.0 +pygments==2.11.1 +pygtrie==2.5.0 +pyparsing==3.0.6 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +pytz==2021.3 +pyyaml==6.0 +regex==2021.11.10 +requests-oauthlib==1.3.0 +requests==2.27.0 +resampy==0.3.1 +responses==0.18.0 +rich==11.2.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==1.1.1 +scipy==1.7.3 +sentry-sdk==1.5.2 +setuptools==44.0.0 +shortuuid==1.0.8 +six==1.16.0 +smmap==5.0.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sox==1.4.1 +subprocess32==3.5.4 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.7.0 +tensorflow-cpu==2.7.0 +tensorflow-datasets==4.4.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.23.1 +tensorflow-metadata==1.5.0 +tensorflow==2.7.0 +tensorstore==0.1.21 +termcolor==1.1.0 +threadpoolctl==3.1.0 +tokenizers==0.11.2 +toolz==0.11.2 +torch==1.12.0 +torchaudio==0.12.0+cpu +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.21.0 +typing-extensions==4.3.0 +uritemplate==3.0.1 +urllib3==1.26.7 +wandb==0.12.9 +wcwidth==0.2.5 +werkzeug==2.0.2 +wheel==0.37.1 +wrapt==1.13.3 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/wandb-metadata.json b/wandb/run-20220730_135604-y1b5rbiq/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..77413b8e42c77ac3c30f847303c3c3fc8c1f2e9f --- /dev/null +++ b/wandb/run-20220730_135604-y1b5rbiq/files/wandb-metadata.json @@ -0,0 +1,67 @@ +{ + "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29", + "python": "3.8.10", + "heartbeatAt": "2022-07-30T13:56:07.788166", + "startedAt": "2022-07-30T13:56:04.284610", + "docker": null, + "cpu_count": 96, + "cuda": null, + "args": [ + "--model_name_or_path=facebook/wav2vec2-xls-r-1b", + "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst", + "--tokenizer_name=./", + "--output_dir=./", + "--overwrite_output_dir", + "--num_train_epochs=40", + "--per_device_train_batch_size=12", + "--per_device_eval_batch_size=12", + "--gradient_accumulation_steps=1", + "--precision=full_mixed", + "--learning_rate=1e-4", + "--warmup_steps=4000", + "--length_column_name=input_length", + "--evaluation_strategy=steps", + "--text_column_name=text", + "--save_steps=1000", + "--eval_steps=1000", + "--logging_steps=100", + "--layerdrop=0.041", + "--attention_dropout=0.094", + "--activation_dropout=0.055", + "--hidden_dropout=0.047", + "--save_total_limit=5", + "--freeze_feature_encoder", + "--feat_proj_dropout=0.04", + "--mask_time_prob=0.082", + "--mask_time_length=10", + "--mask_feature_prob=0.25", + "--mask_feature_length=64", + "--gradient_checkpointing", + "--min_duration_in_seconds=0.5", + "--max_duration_in_seconds=20.0", + "--use_auth_token", + "--seed=42", + "--group_by_length", + "--do_train", + "--do_eval", + "--push_to_hub", + "--preprocessing_num_workers=32", + "--ctc_zero_infinity", + "--do_lower_case", + "--wandb_project=wav2vec2", + "--wandb_name=wav2vec2-1b-npsc-nst", + "--remove_punctuation" + ], + "state": "running", + "program": "run_flax_speech_recognition_ctc.py", + "codePath": "run_flax_speech_recognition_ctc.py", + "git": { + "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst", + "commit": "63f0838b605b109a08e90f07fe84d6a94047f139" + }, + "email": "versae@gmail.com", + "root": "/data/wav2vec2-1b-npsc-nst", + "host": "t1v-n-eedfb410-w-0", + "username": "javierr", + "executable": "/data/flax/bin/python" +} diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json b/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..4f4b362e2c3616387913e1c94c73c52192f52605 --- /dev/null +++ b/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/grad_norm": 11.5, "layer_grad_norm/": {"lm_head": {"bias": 0.099609375, "kernel": 1.8125}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.0751953125, "scale": 0.06640625}, "layers": {"0": {"attention": {"k_proj": {"bias": 0.000339508056640625, "kernel": 0.2158203125}, "out_proj": {"bias": 0.10205078125, "kernel": 0.9921875}, "q_proj": {"bias": 0.018798828125, "kernel": 0.2412109375}, "v_proj": {"bias": 0.08056640625, "kernel": 0.6875}}, "feed_forward": {"intermediate_dense": {"bias": 0.1142578125, "kernel": 1.5078125}, "output_dense": {"bias": 0.058349609375, "kernel": 1.296875}}, "final_layer_norm": {"bias": 0.27734375, "scale": 0.453125}, "layer_norm": {"bias": 0.1513671875, "scale": 0.34375}}, "1": {"attention": {"k_proj": {"bias": 0.00014495849609375, "kernel": 0.10107421875}, "out_proj": {"bias": 0.0673828125, "kernel": 0.7265625}, "q_proj": {"bias": 0.0091552734375, "kernel": 0.1064453125}, "v_proj": {"bias": 0.0947265625, "kernel": 0.609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.076171875, "kernel": 1.078125}, "output_dense": {"bias": 0.0625, "kernel": 0.953125}}, "final_layer_norm": {"bias": 0.134765625, "scale": 0.13671875}, "layer_norm": {"bias": 0.142578125, "scale": 0.1103515625}}, "10": {"attention": {"k_proj": {"bias": 8.392333984375e-05, "kernel": 0.228515625}, "out_proj": {"bias": 0.052978515625, "kernel": 0.578125}, "q_proj": {"bias": 0.014404296875, "kernel": 0.244140625}, "v_proj": {"bias": 0.078125, "kernel": 0.703125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0732421875, "kernel": 1.078125}, "output_dense": {"bias": 0.051513671875, "kernel": 0.84375}}, "final_layer_norm": {"bias": 0.1201171875, "scale": 0.09033203125}, "layer_norm": {"bias": 0.140625, "scale": 0.0859375}}, "11": {"attention": {"k_proj": {"bias": 0.00010013580322265625, "kernel": 0.25}, "out_proj": {"bias": 0.0498046875, "kernel": 0.67578125}, "q_proj": {"bias": 0.015869140625, "kernel": 0.2451171875}, "v_proj": {"bias": 0.080078125, "kernel": 0.8125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0673828125, "kernel": 1.046875}, "output_dense": {"bias": 0.048828125, "kernel": 0.7890625}}, "final_layer_norm": {"bias": 0.10693359375, "scale": 0.087890625}, "layer_norm": {"bias": 0.138671875, "scale": 0.10546875}}, "12": {"attention": {"k_proj": {"bias": 8.630752563476562e-05, "kernel": 0.23828125}, "out_proj": {"bias": 0.04931640625, "kernel": 0.578125}, "q_proj": {"bias": 0.0146484375, "kernel": 0.23046875}, "v_proj": {"bias": 0.076171875, "kernel": 0.72265625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0732421875, "kernel": 1.0546875}, "output_dense": {"bias": 0.047607421875, "kernel": 0.7734375}}, "final_layer_norm": {"bias": 0.1201171875, "scale": 0.0986328125}, "layer_norm": {"bias": 0.11572265625, "scale": 0.0927734375}}, "13": {"attention": {"k_proj": {"bias": 0.00012683868408203125, "kernel": 0.275390625}, "out_proj": {"bias": 0.0498046875, "kernel": 0.66796875}, "q_proj": {"bias": 0.017578125, "kernel": 0.26953125}, "v_proj": {"bias": 0.08447265625, "kernel": 0.8671875}}, "feed_forward": {"intermediate_dense": {"bias": 0.07275390625, "kernel": 1.03125}, "output_dense": {"bias": 0.0517578125, "kernel": 0.8125}}, "final_layer_norm": {"bias": 0.11962890625, "scale": 0.09716796875}, "layer_norm": {"bias": 0.1171875, "scale": 0.1142578125}}, "14": {"attention": {"k_proj": {"bias": 0.0002002716064453125, "kernel": 0.224609375}, "out_proj": {"bias": 0.0498046875, "kernel": 0.65625}, "q_proj": {"bias": 0.014404296875, "kernel": 0.224609375}, "v_proj": {"bias": 0.0791015625, "kernel": 0.81640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.07568359375, "kernel": 1.0703125}, "output_dense": {"bias": 0.05029296875, "kernel": 0.87109375}}, "final_layer_norm": {"bias": 0.1259765625, "scale": 0.1279296875}, "layer_norm": {"bias": 0.11376953125, "scale": 0.1005859375}}, "15": {"attention": {"k_proj": {"bias": 0.00020694732666015625, "kernel": 0.30078125}, "out_proj": {"bias": 0.050537109375, "kernel": 0.86328125}, "q_proj": {"bias": 0.018310546875, "kernel": 0.28125}, "v_proj": {"bias": 0.0830078125, "kernel": 0.875}}, "feed_forward": {"intermediate_dense": {"bias": 0.07080078125, "kernel": 0.96875}, "output_dense": {"bias": 0.052001953125, "kernel": 0.83203125}}, "final_layer_norm": {"bias": 0.11962890625, "scale": 0.130859375}, "layer_norm": {"bias": 0.119140625, "scale": 0.1123046875}}, "16": {"attention": {"k_proj": {"bias": 0.00014400482177734375, "kernel": 0.298828125}, "out_proj": {"bias": 0.05078125, "kernel": 0.6328125}, "q_proj": {"bias": 0.018310546875, "kernel": 0.296875}, "v_proj": {"bias": 0.0810546875, "kernel": 0.76953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0693359375, "kernel": 0.98046875}, "output_dense": {"bias": 0.048828125, "kernel": 0.859375}}, "final_layer_norm": {"bias": 0.11279296875, "scale": 0.10546875}, "layer_norm": {"bias": 0.12109375, "scale": 0.189453125}}, "17": {"attention": {"k_proj": {"bias": 0.000141143798828125, "kernel": 0.275390625}, "out_proj": {"bias": 0.0546875, "kernel": 0.59765625}, "q_proj": {"bias": 0.017333984375, "kernel": 0.275390625}, "v_proj": {"bias": 0.083984375, "kernel": 0.734375}}, "feed_forward": {"intermediate_dense": {"bias": 0.07421875, "kernel": 1.046875}, "output_dense": {"bias": 0.052978515625, "kernel": 0.8359375}}, "final_layer_norm": {"bias": 0.1201171875, "scale": 0.12060546875}, "layer_norm": {"bias": 0.126953125, "scale": 0.1328125}}, "18": {"attention": {"k_proj": {"bias": 0.000148773193359375, "kernel": 0.33203125}, "out_proj": {"bias": 0.04931640625, "kernel": 0.67578125}, "q_proj": {"bias": 0.01904296875, "kernel": 0.310546875}, "v_proj": {"bias": 0.0791015625, "kernel": 0.7265625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0673828125, "kernel": 1.0078125}, "output_dense": {"bias": 0.044921875, "kernel": 0.84375}}, "final_layer_norm": {"bias": 0.10986328125, "scale": 0.0927734375}, "layer_norm": {"bias": 0.119140625, "scale": 0.11865234375}}, "19": {"attention": {"k_proj": {"bias": 0.00011110305786132812, "kernel": 0.2314453125}, "out_proj": {"bias": 0.046875, "kernel": 0.515625}, "q_proj": {"bias": 0.0145263671875, "kernel": 0.251953125}, "v_proj": {"bias": 0.0693359375, "kernel": 0.6171875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0595703125, "kernel": 0.9453125}, "output_dense": {"bias": 0.0439453125, "kernel": 0.8203125}}, "final_layer_norm": {"bias": 0.0947265625, "scale": 0.0771484375}, "layer_norm": {"bias": 0.0986328125, "scale": 0.10302734375}}, "2": {"attention": {"k_proj": {"bias": 0.0001392364501953125, "kernel": 0.1484375}, "out_proj": {"bias": 0.07666015625, "kernel": 0.7578125}, "q_proj": {"bias": 0.01312255859375, "kernel": 0.15234375}, "v_proj": {"bias": 0.1220703125, "kernel": 0.87109375}}, "feed_forward": {"intermediate_dense": {"bias": 0.08544921875, "kernel": 1.3671875}, "output_dense": {"bias": 0.0703125, "kernel": 1.015625}}, "final_layer_norm": {"bias": 0.14453125, "scale": 0.1220703125}, "layer_norm": {"bias": 0.1611328125, "scale": 0.1435546875}}, "20": {"attention": {"k_proj": {"bias": 6.008148193359375e-05, "kernel": 0.1796875}, "out_proj": {"bias": 0.0478515625, "kernel": 0.357421875}, "q_proj": {"bias": 0.012451171875, "kernel": 0.2275390625}, "v_proj": {"bias": 0.0673828125, "kernel": 0.44921875}}, "feed_forward": {"intermediate_dense": {"bias": 0.06103515625, "kernel": 1.0}, "output_dense": {"bias": 0.0458984375, "kernel": 0.8359375}}, "final_layer_norm": {"bias": 0.0966796875, "scale": 0.08544921875}, "layer_norm": {"bias": 0.0966796875, "scale": 0.0859375}}, "21": {"attention": {"k_proj": {"bias": 0.00010013580322265625, "kernel": 0.203125}, "out_proj": {"bias": 0.0478515625, "kernel": 0.515625}, "q_proj": {"bias": 0.01239013671875, "kernel": 0.228515625}, "v_proj": {"bias": 0.06689453125, "kernel": 0.58203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.060302734375, "kernel": 1.0}, "output_dense": {"bias": 0.047119140625, "kernel": 0.83203125}}, "final_layer_norm": {"bias": 0.095703125, "scale": 0.0859375}, "layer_norm": {"bias": 0.08935546875, "scale": 0.0849609375}}, "22": {"attention": {"k_proj": {"bias": 6.628036499023438e-05, "kernel": 0.20703125}, "out_proj": {"bias": 0.0517578125, "kernel": 0.43359375}, "q_proj": {"bias": 0.013671875, "kernel": 0.248046875}, "v_proj": {"bias": 0.07177734375, "kernel": 0.51953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0673828125, "kernel": 1.0625}, "output_dense": {"bias": 0.052978515625, "kernel": 0.828125}}, "final_layer_norm": {"bias": 0.1103515625, "scale": 0.123046875}, "layer_norm": {"bias": 0.1044921875, "scale": 0.08935546875}}, "23": {"attention": {"k_proj": {"bias": 0.000152587890625, "kernel": 0.2734375}, "out_proj": {"bias": 0.05712890625, "kernel": 0.7421875}, "q_proj": {"bias": 0.0167236328125, "kernel": 0.283203125}, "v_proj": {"bias": 0.08447265625, "kernel": 0.8046875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0693359375, "kernel": 1.0703125}, "output_dense": {"bias": 0.056640625, "kernel": 0.7890625}}, "final_layer_norm": {"bias": 0.1103515625, "scale": 0.12158203125}, "layer_norm": {"bias": 0.119140625, "scale": 0.150390625}}, "24": {"attention": {"k_proj": {"bias": 0.00010776519775390625, "kernel": 0.251953125}, "out_proj": {"bias": 0.04833984375, "kernel": 0.57421875}, "q_proj": {"bias": 0.0166015625, "kernel": 0.265625}, "v_proj": {"bias": 0.083984375, "kernel": 0.6875}}, "feed_forward": {"intermediate_dense": {"bias": 0.06201171875, "kernel": 0.96875}, "output_dense": {"bias": 0.045166015625, "kernel": 0.71875}}, "final_layer_norm": {"bias": 0.1015625, "scale": 0.10546875}, "layer_norm": {"bias": 0.138671875, "scale": 0.099609375}}, "25": {"attention": {"k_proj": {"bias": 0.00013446807861328125, "kernel": 0.228515625}, "out_proj": {"bias": 0.046630859375, "kernel": 0.5859375}, "q_proj": {"bias": 0.015869140625, "kernel": 0.240234375}, "v_proj": {"bias": 0.07421875, "kernel": 0.66796875}}, "feed_forward": {"intermediate_dense": {"bias": 0.058837890625, "kernel": 0.9296875}, "output_dense": {"bias": 0.044921875, "kernel": 0.6796875}}, "final_layer_norm": {"bias": 0.1005859375, "scale": 0.126953125}, "layer_norm": {"bias": 0.111328125, "scale": 0.1376953125}}, "26": {"attention": {"k_proj": {"bias": 0.00011539459228515625, "kernel": 0.240234375}, "out_proj": {"bias": 0.04443359375, "kernel": 0.56640625}, "q_proj": {"bias": 0.0159912109375, "kernel": 0.265625}, "v_proj": {"bias": 0.07177734375, "kernel": 0.64453125}}, "feed_forward": {"intermediate_dense": {"bias": 0.05810546875, "kernel": 0.859375}, "output_dense": {"bias": 0.044921875, "kernel": 0.6953125}}, "final_layer_norm": {"bias": 0.095703125, "scale": 0.0888671875}, "layer_norm": {"bias": 0.1005859375, "scale": 0.09375}}, "27": {"attention": {"k_proj": {"bias": 0.0001506805419921875, "kernel": 0.279296875}, "out_proj": {"bias": 0.0400390625, "kernel": 0.63671875}, "q_proj": {"bias": 0.016357421875, "kernel": 0.287109375}, "v_proj": {"bias": 0.06640625, "kernel": 0.6640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.053955078125, "kernel": 0.8046875}, "output_dense": {"bias": 0.041259765625, "kernel": 0.6796875}}, "final_layer_norm": {"bias": 0.091796875, "scale": 0.08349609375}, "layer_norm": {"bias": 0.1044921875, "scale": 0.0732421875}}, "28": {"attention": {"k_proj": {"bias": 0.00016498565673828125, "kernel": 0.228515625}, "out_proj": {"bias": 0.037109375, "kernel": 0.65625}, "q_proj": {"bias": 0.014404296875, "kernel": 0.25}, "v_proj": {"bias": 0.057861328125, "kernel": 0.640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.051025390625, "kernel": 0.8046875}, "output_dense": {"bias": 0.03857421875, "kernel": 0.6875}}, "final_layer_norm": {"bias": 0.0849609375, "scale": 0.0869140625}, "layer_norm": {"bias": 0.09423828125, "scale": 0.14453125}}, "29": {"attention": {"k_proj": {"bias": 0.000110626220703125, "kernel": 0.220703125}, "out_proj": {"bias": 0.034423828125, "kernel": 0.52734375}, "q_proj": {"bias": 0.01220703125, "kernel": 0.23828125}, "v_proj": {"bias": 0.0537109375, "kernel": 0.5703125}}, "feed_forward": {"intermediate_dense": {"bias": 0.046875, "kernel": 0.84765625}, "output_dense": {"bias": 0.03271484375, "kernel": 0.66015625}}, "final_layer_norm": {"bias": 0.0712890625, "scale": 0.068359375}, "layer_norm": {"bias": 0.0927734375, "scale": 0.1044921875}}, "3": {"attention": {"k_proj": {"bias": 0.0001964569091796875, "kernel": 0.271484375}, "out_proj": {"bias": 0.0771484375, "kernel": 0.98046875}, "q_proj": {"bias": 0.02001953125, "kernel": 0.259765625}, "v_proj": {"bias": 0.1220703125, "kernel": 1.1015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.091796875, "kernel": 1.453125}, "output_dense": {"bias": 0.0712890625, "kernel": 1.03125}}, "final_layer_norm": {"bias": 0.16015625, "scale": 0.1376953125}, "layer_norm": {"bias": 0.171875, "scale": 0.2041015625}}, "30": {"attention": {"k_proj": {"bias": 0.0001239776611328125, "kernel": 0.265625}, "out_proj": {"bias": 0.0322265625, "kernel": 0.53515625}, "q_proj": {"bias": 0.013916015625, "kernel": 0.291015625}, "v_proj": {"bias": 0.04833984375, "kernel": 0.578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.04443359375, "kernel": 0.828125}, "output_dense": {"bias": 0.03125, "kernel": 0.57421875}}, "final_layer_norm": {"bias": 0.06689453125, "scale": 0.068359375}, "layer_norm": {"bias": 0.06884765625, "scale": 0.08154296875}}, "31": {"attention": {"k_proj": {"bias": 0.000133514404296875, "kernel": 0.2578125}, "out_proj": {"bias": 0.029541015625, "kernel": 0.52734375}, "q_proj": {"bias": 0.0140380859375, "kernel": 0.27734375}, "v_proj": {"bias": 0.04443359375, "kernel": 0.5625}}, "feed_forward": {"intermediate_dense": {"bias": 0.03955078125, "kernel": 0.734375}, "output_dense": {"bias": 0.0286865234375, "kernel": 0.5390625}}, "final_layer_norm": {"bias": 0.0615234375, "scale": 0.05810546875}, "layer_norm": {"bias": 0.0673828125, "scale": 0.09521484375}}, "32": {"attention": {"k_proj": {"bias": 0.00010204315185546875, "kernel": 0.2099609375}, "out_proj": {"bias": 0.0267333984375, "kernel": 0.4296875}, "q_proj": {"bias": 0.01165771484375, "kernel": 0.2333984375}, "v_proj": {"bias": 0.0380859375, "kernel": 0.46484375}}, "feed_forward": {"intermediate_dense": {"bias": 0.037109375, "kernel": 0.7109375}, "output_dense": {"bias": 0.025146484375, "kernel": 0.4921875}}, "final_layer_norm": {"bias": 0.058349609375, "scale": 0.05517578125}, "layer_norm": {"bias": 0.05615234375, "scale": 0.0712890625}}, "33": {"attention": {"k_proj": {"bias": 0.00010395050048828125, "kernel": 0.23828125}, "out_proj": {"bias": 0.0240478515625, "kernel": 0.447265625}, "q_proj": {"bias": 0.01312255859375, "kernel": 0.26953125}, "v_proj": {"bias": 0.03564453125, "kernel": 0.474609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.03369140625, "kernel": 0.640625}, "output_dense": {"bias": 0.02294921875, "kernel": 0.4765625}}, "final_layer_norm": {"bias": 0.055419921875, "scale": 0.0634765625}, "layer_norm": {"bias": 0.051025390625, "scale": 0.06396484375}}, "34": {"attention": {"k_proj": {"bias": 0.00010824203491210938, "kernel": 0.2021484375}, "out_proj": {"bias": 0.0198974609375, "kernel": 0.4375}, "q_proj": {"bias": 0.0103759765625, "kernel": 0.216796875}, "v_proj": {"bias": 0.0291748046875, "kernel": 0.4296875}}, "feed_forward": {"intermediate_dense": {"bias": 0.02734375, "kernel": 0.5390625}, "output_dense": {"bias": 0.01904296875, "kernel": 0.451171875}}, "final_layer_norm": {"bias": 0.043701171875, "scale": 0.04541015625}, "layer_norm": {"bias": 0.044921875, "scale": 0.054443359375}}, "35": {"attention": {"k_proj": {"bias": 0.00012969970703125, "kernel": 0.1513671875}, "out_proj": {"bias": 0.017822265625, "kernel": 0.447265625}, "q_proj": {"bias": 0.007568359375, "kernel": 0.169921875}, "v_proj": {"bias": 0.0235595703125, "kernel": 0.38671875}}, "feed_forward": {"intermediate_dense": {"bias": 0.024169921875, "kernel": 0.466796875}, "output_dense": {"bias": 0.01806640625, "kernel": 0.416015625}}, "final_layer_norm": {"bias": 0.0380859375, "scale": 0.037109375}, "layer_norm": {"bias": 0.037109375, "scale": 0.04150390625}}, "36": {"attention": {"k_proj": {"bias": 6.771087646484375e-05, "kernel": 0.12890625}, "out_proj": {"bias": 0.0174560546875, "kernel": 0.396484375}, "q_proj": {"bias": 0.0062255859375, "kernel": 0.134765625}, "v_proj": {"bias": 0.0225830078125, "kernel": 0.33984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0240478515625, "kernel": 0.455078125}, "output_dense": {"bias": 0.017822265625, "kernel": 0.3828125}}, "final_layer_norm": {"bias": 0.038330078125, "scale": 0.03466796875}, "layer_norm": {"bias": 0.035400390625, "scale": 0.026123046875}}, "37": {"attention": {"k_proj": {"bias": 6.246566772460938e-05, "kernel": 0.125}, "out_proj": {"bias": 0.01708984375, "kernel": 0.4140625}, "q_proj": {"bias": 0.00634765625, "kernel": 0.13671875}, "v_proj": {"bias": 0.0238037109375, "kernel": 0.375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0234375, "kernel": 0.45703125}, "output_dense": {"bias": 0.0169677734375, "kernel": 0.376953125}}, "final_layer_norm": {"bias": 0.03759765625, "scale": 0.03564453125}, "layer_norm": {"bias": 0.041259765625, "scale": 0.033203125}}, "38": {"attention": {"k_proj": {"bias": 6.4849853515625e-05, "kernel": 0.12158203125}, "out_proj": {"bias": 0.015869140625, "kernel": 0.40234375}, "q_proj": {"bias": 0.0054931640625, "kernel": 0.123046875}, "v_proj": {"bias": 0.0224609375, "kernel": 0.3671875}}, "feed_forward": {"intermediate_dense": {"bias": 0.023681640625, "kernel": 0.47265625}, "output_dense": {"bias": 0.015869140625, "kernel": 0.39453125}}, "final_layer_norm": {"bias": 0.03857421875, "scale": 0.040771484375}, "layer_norm": {"bias": 0.03759765625, "scale": 0.0303955078125}}, "39": {"attention": {"k_proj": {"bias": 6.341934204101562e-05, "kernel": 0.11865234375}, "out_proj": {"bias": 0.01416015625, "kernel": 0.39453125}, "q_proj": {"bias": 0.00567626953125, "kernel": 0.1328125}, "v_proj": {"bias": 0.0196533203125, "kernel": 0.333984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.02294921875, "kernel": 0.484375}, "output_dense": {"bias": 0.01409912109375, "kernel": 0.4765625}}, "final_layer_norm": {"bias": 0.03662109375, "scale": 0.033447265625}, "layer_norm": {"bias": 0.03369140625, "scale": 0.0255126953125}}, "4": {"attention": {"k_proj": {"bias": 0.00023174285888671875, "kernel": 0.298828125}, "out_proj": {"bias": 0.0732421875, "kernel": 1.109375}, "q_proj": {"bias": 0.02099609375, "kernel": 0.298828125}, "v_proj": {"bias": 0.11328125, "kernel": 1.203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0927734375, "kernel": 1.3828125}, "output_dense": {"bias": 0.072265625, "kernel": 1.015625}}, "final_layer_norm": {"bias": 0.1484375, "scale": 0.146484375}, "layer_norm": {"bias": 0.1572265625, "scale": 0.12255859375}}, "40": {"attention": {"k_proj": {"bias": 3.9577484130859375e-05, "kernel": 0.0888671875}, "out_proj": {"bias": 0.013671875, "kernel": 0.3671875}, "q_proj": {"bias": 0.003875732421875, "kernel": 0.09619140625}, "v_proj": {"bias": 0.0189208984375, "kernel": 0.33984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.02294921875, "kernel": 0.49609375}, "output_dense": {"bias": 0.013916015625, "kernel": 0.39453125}}, "final_layer_norm": {"bias": 0.038818359375, "scale": 0.0390625}, "layer_norm": {"bias": 0.030517578125, "scale": 0.025390625}}, "41": {"attention": {"k_proj": {"bias": 4.4345855712890625e-05, "kernel": 0.09912109375}, "out_proj": {"bias": 0.01226806640625, "kernel": 0.34765625}, "q_proj": {"bias": 0.00439453125, "kernel": 0.1103515625}, "v_proj": {"bias": 0.0189208984375, "kernel": 0.375}}, "feed_forward": {"intermediate_dense": {"bias": 0.019775390625, "kernel": 0.478515625}, "output_dense": {"bias": 0.0125732421875, "kernel": 0.40625}}, "final_layer_norm": {"bias": 0.03466796875, "scale": 0.035400390625}, "layer_norm": {"bias": 0.0311279296875, "scale": 0.037841796875}}, "42": {"attention": {"k_proj": {"bias": 3.0994415283203125e-05, "kernel": 0.06005859375}, "out_proj": {"bias": 0.0123291015625, "kernel": 0.30078125}, "q_proj": {"bias": 0.002777099609375, "kernel": 0.068359375}, "v_proj": {"bias": 0.01611328125, "kernel": 0.302734375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0177001953125, "kernel": 0.44921875}, "output_dense": {"bias": 0.012939453125, "kernel": 0.337890625}}, "final_layer_norm": {"bias": 0.028076171875, "scale": 0.028076171875}, "layer_norm": {"bias": 0.02490234375, "scale": 0.02880859375}}, "43": {"attention": {"k_proj": {"bias": 1.811981201171875e-05, "kernel": 0.0400390625}, "out_proj": {"bias": 0.01336669921875, "kernel": 0.2578125}, "q_proj": {"bias": 0.00183868408203125, "kernel": 0.04248046875}, "v_proj": {"bias": 0.0159912109375, "kernel": 0.275390625}}, "feed_forward": {"intermediate_dense": {"bias": 0.019775390625, "kernel": 0.5234375}, "output_dense": {"bias": 0.014404296875, "kernel": 0.3515625}}, "final_layer_norm": {"bias": 0.032470703125, "scale": 0.0303955078125}, "layer_norm": {"bias": 0.029052734375, "scale": 0.029296875}}, "44": {"attention": {"k_proj": {"bias": 1.52587890625e-05, "kernel": 0.041015625}, "out_proj": {"bias": 0.0142822265625, "kernel": 0.28125}, "q_proj": {"bias": 0.00186920166015625, "kernel": 0.04296875}, "v_proj": {"bias": 0.017578125, "kernel": 0.310546875}}, "feed_forward": {"intermediate_dense": {"bias": 0.018310546875, "kernel": 0.515625}, "output_dense": {"bias": 0.0155029296875, "kernel": 0.3203125}}, "final_layer_norm": {"bias": 0.0284423828125, "scale": 0.025390625}, "layer_norm": {"bias": 0.03369140625, "scale": 0.0263671875}}, "45": {"attention": {"k_proj": {"bias": 1.5497207641601562e-05, "kernel": 0.03955078125}, "out_proj": {"bias": 0.0146484375, "kernel": 0.265625}, "q_proj": {"bias": 0.001953125, "kernel": 0.042236328125}, "v_proj": {"bias": 0.0185546875, "kernel": 0.3046875}}, "feed_forward": {"intermediate_dense": {"bias": 0.015869140625, "kernel": 0.4375}, "output_dense": {"bias": 0.01556396484375, "kernel": 0.27734375}}, "final_layer_norm": {"bias": 0.025146484375, "scale": 0.0228271484375}, "layer_norm": {"bias": 0.0400390625, "scale": 0.0294189453125}}, "46": {"attention": {"k_proj": {"bias": 1.633167266845703e-05, "kernel": 0.03955078125}, "out_proj": {"bias": 0.0142822265625, "kernel": 0.2578125}, "q_proj": {"bias": 0.0018463134765625, "kernel": 0.03857421875}, "v_proj": {"bias": 0.0196533203125, "kernel": 0.3203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0125732421875, "kernel": 0.31640625}, "output_dense": {"bias": 0.0140380859375, "kernel": 0.244140625}}, "final_layer_norm": {"bias": 0.01904296875, "scale": 0.0223388671875}, "layer_norm": {"bias": 0.051025390625, "scale": 0.036865234375}}, "47": {"attention": {"k_proj": {"bias": 1.4424324035644531e-05, "kernel": 0.04736328125}, "out_proj": {"bias": 0.013916015625, "kernel": 0.193359375}, "q_proj": {"bias": 0.0025177001953125, "kernel": 0.04248046875}, "v_proj": {"bias": 0.024169921875, "kernel": 0.3359375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0103759765625, "kernel": 0.203125}, "output_dense": {"bias": 0.0125732421875, "kernel": 0.181640625}}, "final_layer_norm": {"bias": 0.020751953125, "scale": 0.0184326171875}, "layer_norm": {"bias": 0.06396484375, "scale": 0.044921875}}, "5": {"attention": {"k_proj": {"bias": 0.0001220703125, "kernel": 0.271484375}, "out_proj": {"bias": 0.07421875, "kernel": 0.77734375}, "q_proj": {"bias": 0.0185546875, "kernel": 0.283203125}, "v_proj": {"bias": 0.1162109375, "kernel": 0.953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.09375, "kernel": 1.3203125}, "output_dense": {"bias": 0.076171875, "kernel": 1.0}}, "final_layer_norm": {"bias": 0.158203125, "scale": 0.125}, "layer_norm": {"bias": 0.171875, "scale": 0.15234375}}, "6": {"attention": {"k_proj": {"bias": 0.000164031982421875, "kernel": 0.3203125}, "out_proj": {"bias": 0.068359375, "kernel": 0.9453125}, "q_proj": {"bias": 0.021240234375, "kernel": 0.3203125}, "v_proj": {"bias": 0.11865234375, "kernel": 1.125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0849609375, "kernel": 1.265625}, "output_dense": {"bias": 0.0703125, "kernel": 0.91796875}}, "final_layer_norm": {"bias": 0.1396484375, "scale": 0.1142578125}, "layer_norm": {"bias": 0.16796875, "scale": 0.15625}}, "7": {"attention": {"k_proj": {"bias": 0.00028228759765625, "kernel": 0.30078125}, "out_proj": {"bias": 0.06787109375, "kernel": 0.953125}, "q_proj": {"bias": 0.0201416015625, "kernel": 0.296875}, "v_proj": {"bias": 0.1064453125, "kernel": 1.0625}}, "feed_forward": {"intermediate_dense": {"bias": 0.08251953125, "kernel": 1.28125}, "output_dense": {"bias": 0.0673828125, "kernel": 0.921875}}, "final_layer_norm": {"bias": 0.1357421875, "scale": 0.11962890625}, "layer_norm": {"bias": 0.1689453125, "scale": 0.12890625}}, "8": {"attention": {"k_proj": {"bias": 0.0001583099365234375, "kernel": 0.2890625}, "out_proj": {"bias": 0.06396484375, "kernel": 0.84375}, "q_proj": {"bias": 0.0184326171875, "kernel": 0.283203125}, "v_proj": {"bias": 0.10546875, "kernel": 1.0}}, "feed_forward": {"intermediate_dense": {"bias": 0.08056640625, "kernel": 1.234375}, "output_dense": {"bias": 0.06201171875, "kernel": 0.8984375}}, "final_layer_norm": {"bias": 0.138671875, "scale": 0.126953125}, "layer_norm": {"bias": 0.162109375, "scale": 0.1796875}}, "9": {"attention": {"k_proj": {"bias": 0.000194549560546875, "kernel": 0.302734375}, "out_proj": {"bias": 0.054931640625, "kernel": 0.984375}, "q_proj": {"bias": 0.0174560546875, "kernel": 0.30078125}, "v_proj": {"bias": 0.08837890625, "kernel": 1.09375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0703125, "kernel": 1.125}, "output_dense": {"bias": 0.0556640625, "kernel": 0.8828125}}, "final_layer_norm": {"bias": 0.1162109375, "scale": 0.1240234375}, "layer_norm": {"bias": 0.140625, "scale": 0.095703125}}}, "pos_conv_embed": {"conv": {"bias": 0.130859375, "weight_g": 0.072265625, "weight_v": 0.91796875}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "3": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "4": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "5": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "6": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 0.2490234375, "scale": 0.3671875}, "projection": {"bias": 0.1611328125, "kernel": 2.9375}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"lm_head": {"bias": 0.006227790843695402, "kernel": 4.543642997741699}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.8080048561096191, "scale": 22.27030372619629}, "layers": {"0": {"attention": {"k_proj": {"bias": 0.017717748880386353, "kernel": 25.907127380371094}, "out_proj": {"bias": 1.5421152114868164, "kernel": 25.076631546020508}, "q_proj": {"bias": 1.2994158267974854, "kernel": 26.18454360961914}, "v_proj": {"bias": 0.3453245759010315, "kernel": 25.80449676513672}}, "feed_forward": {"intermediate_dense": {"bias": 1.7608319520950317, "kernel": 95.11439514160156}, "output_dense": {"bias": 1.0213779211044312, "kernel": 90.89668273925781}}, "final_layer_norm": {"bias": 1.2822446823120117, "scale": 19.880083084106445}, "layer_norm": {"bias": 3.290161609649658, "scale": 16.032001495361328}}, "1": {"attention": {"k_proj": {"bias": 0.017420589923858643, "kernel": 40.236366271972656}, "out_proj": {"bias": 1.2911627292633057, "kernel": 41.64336395263672}, "q_proj": {"bias": 2.8566694259643555, "kernel": 40.07891082763672}, "v_proj": {"bias": 0.2825187146663666, "kernel": 40.126407623291016}}, "feed_forward": {"intermediate_dense": {"bias": 1.577695608139038, "kernel": 93.2037124633789}, "output_dense": {"bias": 0.8034582138061523, "kernel": 84.11729431152344}}, "final_layer_norm": {"bias": 1.1328635215759277, "scale": 18.407102584838867}, "layer_norm": {"bias": 1.73884916305542, "scale": 19.37253189086914}}, "10": {"attention": {"k_proj": {"bias": 0.03383120149374008, "kernel": 47.28413391113281}, "out_proj": {"bias": 1.2185262441635132, "kernel": 50.11650848388672}, "q_proj": {"bias": 2.4208953380584717, "kernel": 47.244537353515625}, "v_proj": {"bias": 0.31392401456832886, "kernel": 50.314903259277344}}, "feed_forward": {"intermediate_dense": {"bias": 1.622002124786377, "kernel": 97.558349609375}, "output_dense": {"bias": 0.5634207725524902, "kernel": 91.49876403808594}}, "final_layer_norm": {"bias": 2.1998660564422607, "scale": 20.355709075927734}, "layer_norm": {"bias": 1.6904888153076172, "scale": 22.30306625366211}}, "11": {"attention": {"k_proj": {"bias": 0.09422115236520767, "kernel": 47.070281982421875}, "out_proj": {"bias": 1.0706963539123535, "kernel": 49.301727294921875}, "q_proj": {"bias": 2.470736026763916, "kernel": 46.793678283691406}, "v_proj": {"bias": 0.3550601005554199, "kernel": 49.84492111206055}}, "feed_forward": {"intermediate_dense": {"bias": 1.67034113407135, "kernel": 98.30744934082031}, "output_dense": {"bias": 0.5463298559188843, "kernel": 93.21025848388672}}, "final_layer_norm": {"bias": 2.18017578125, "scale": 20.362842559814453}, "layer_norm": {"bias": 1.6739838123321533, "scale": 22.603429794311523}}, "12": {"attention": {"k_proj": {"bias": 0.037733033299446106, "kernel": 47.66437530517578}, "out_proj": {"bias": 1.055997610092163, "kernel": 49.61451721191406}, "q_proj": {"bias": 2.3581721782684326, "kernel": 47.41926956176758}, "v_proj": {"bias": 0.34188324213027954, "kernel": 50.03089904785156}}, "feed_forward": {"intermediate_dense": {"bias": 1.713924765586853, "kernel": 99.14901733398438}, "output_dense": {"bias": 0.5359264612197876, "kernel": 94.75711059570312}}, "final_layer_norm": {"bias": 2.1364316940307617, "scale": 20.325605392456055}, "layer_norm": {"bias": 1.7382376194000244, "scale": 23.154159545898438}}, "13": {"attention": {"k_proj": {"bias": 0.06478377431631088, "kernel": 49.548011779785156}, "out_proj": {"bias": 1.049849510192871, "kernel": 49.253868103027344}, "q_proj": {"bias": 2.3329415321350098, "kernel": 49.404293060302734}, "v_proj": {"bias": 0.3697168529033661, "kernel": 49.43449783325195}}, "feed_forward": {"intermediate_dense": {"bias": 1.7687466144561768, "kernel": 99.75472259521484}, "output_dense": {"bias": 0.5525321960449219, "kernel": 95.19114685058594}}, "final_layer_norm": {"bias": 2.020570993423462, "scale": 20.470088958740234}, "layer_norm": {"bias": 1.837044358253479, "scale": 23.37548065185547}}, "14": {"attention": {"k_proj": {"bias": 0.15069520473480225, "kernel": 49.753196716308594}, "out_proj": {"bias": 1.2117998600006104, "kernel": 47.694759368896484}, "q_proj": {"bias": 2.397552490234375, "kernel": 49.812904357910156}, "v_proj": {"bias": 0.37084758281707764, "kernel": 47.27647399902344}}, "feed_forward": {"intermediate_dense": {"bias": 1.8047257661819458, "kernel": 100.37411499023438}, "output_dense": {"bias": 0.5676146745681763, "kernel": 96.55570983886719}}, "final_layer_norm": {"bias": 2.1551315784454346, "scale": 20.614219665527344}, "layer_norm": {"bias": 1.9676849842071533, "scale": 23.550649642944336}}, "15": {"attention": {"k_proj": {"bias": 0.07710058987140656, "kernel": 49.88433074951172}, "out_proj": {"bias": 1.261141061782837, "kernel": 48.29498291015625}, "q_proj": {"bias": 2.5427799224853516, "kernel": 49.952571868896484}, "v_proj": {"bias": 0.40316635370254517, "kernel": 47.94293975830078}}, "feed_forward": {"intermediate_dense": {"bias": 1.8133320808410645, "kernel": 100.18331909179688}, "output_dense": {"bias": 0.7154802083969116, "kernel": 97.23637390136719}}, "final_layer_norm": {"bias": 2.079444169998169, "scale": 20.706073760986328}, "layer_norm": {"bias": 2.2165818214416504, "scale": 23.696819305419922}}, "16": {"attention": {"k_proj": {"bias": 0.03493429720401764, "kernel": 49.79267120361328}, "out_proj": {"bias": 1.1955194473266602, "kernel": 47.76436996459961}, "q_proj": {"bias": 2.6263279914855957, "kernel": 49.67594909667969}, "v_proj": {"bias": 0.358820378780365, "kernel": 47.44581604003906}}, "feed_forward": {"intermediate_dense": {"bias": 1.8111960887908936, "kernel": 100.83795166015625}, "output_dense": {"bias": 0.7391186356544495, "kernel": 98.10263061523438}}, "final_layer_norm": {"bias": 2.1532938480377197, "scale": 21.192392349243164}, "layer_norm": {"bias": 2.1495795249938965, "scale": 22.608299255371094}}, "17": {"attention": {"k_proj": {"bias": 0.02427005022764206, "kernel": 49.99943542480469}, "out_proj": {"bias": 1.1385328769683838, "kernel": 47.08169937133789}, "q_proj": {"bias": 2.698282241821289, "kernel": 50.10108184814453}, "v_proj": {"bias": 0.39649325609207153, "kernel": 46.75300598144531}}, "feed_forward": {"intermediate_dense": {"bias": 1.8220467567443848, "kernel": 101.9210205078125}, "output_dense": {"bias": 0.7551708817481995, "kernel": 98.4967269897461}}, "final_layer_norm": {"bias": 2.2398810386657715, "scale": 21.752655029296875}, "layer_norm": {"bias": 2.0628278255462646, "scale": 22.166473388671875}}, "18": {"attention": {"k_proj": {"bias": 0.06514191627502441, "kernel": 50.276710510253906}, "out_proj": {"bias": 1.2418107986450195, "kernel": 48.10392379760742}, "q_proj": {"bias": 2.5922303199768066, "kernel": 50.66260528564453}, "v_proj": {"bias": 0.42488259077072144, "kernel": 47.63117218017578}}, "feed_forward": {"intermediate_dense": {"bias": 1.864889144897461, "kernel": 102.2223892211914}, "output_dense": {"bias": 0.8703033924102783, "kernel": 100.13847351074219}}, "final_layer_norm": {"bias": 2.3449723720550537, "scale": 21.713321685791016}, "layer_norm": {"bias": 2.2429957389831543, "scale": 23.88089370727539}}, "19": {"attention": {"k_proj": {"bias": 0.02069919742643833, "kernel": 49.551841735839844}, "out_proj": {"bias": 1.2166869640350342, "kernel": 47.99229431152344}, "q_proj": {"bias": 2.8673105239868164, "kernel": 49.98308563232422}, "v_proj": {"bias": 0.3887897729873657, "kernel": 47.23723602294922}}, "feed_forward": {"intermediate_dense": {"bias": 1.9199435710906982, "kernel": 102.80052947998047}, "output_dense": {"bias": 0.9345583915710449, "kernel": 101.04116821289062}}, "final_layer_norm": {"bias": 2.3030409812927246, "scale": 22.07358169555664}, "layer_norm": {"bias": 2.1653337478637695, "scale": 23.089134216308594}}, "2": {"attention": {"k_proj": {"bias": 0.03994838893413544, "kernel": 46.15595245361328}, "out_proj": {"bias": 1.2123382091522217, "kernel": 43.85425567626953}, "q_proj": {"bias": 3.044398069381714, "kernel": 45.923065185546875}, "v_proj": {"bias": 0.3091737627983093, "kernel": 43.853729248046875}}, "feed_forward": {"intermediate_dense": {"bias": 1.6172540187835693, "kernel": 98.27151489257812}, "output_dense": {"bias": 0.6916781067848206, "kernel": 87.2510986328125}}, "final_layer_norm": {"bias": 1.4531984329223633, "scale": 20.986406326293945}, "layer_norm": {"bias": 1.66593337059021, "scale": 22.054546356201172}}, "20": {"attention": {"k_proj": {"bias": 0.019117258489131927, "kernel": 49.522117614746094}, "out_proj": {"bias": 1.245902419090271, "kernel": 47.3779296875}, "q_proj": {"bias": 2.7806339263916016, "kernel": 50.30908203125}, "v_proj": {"bias": 0.36149418354034424, "kernel": 46.277198791503906}}, "feed_forward": {"intermediate_dense": {"bias": 1.9210526943206787, "kernel": 104.06356811523438}, "output_dense": {"bias": 1.0503356456756592, "kernel": 101.6682357788086}}, "final_layer_norm": {"bias": 2.3304975032806396, "scale": 23.017230987548828}, "layer_norm": {"bias": 2.1417791843414307, "scale": 23.232433319091797}}, "21": {"attention": {"k_proj": {"bias": 0.039454348385334015, "kernel": 49.967464447021484}, "out_proj": {"bias": 1.2816205024719238, "kernel": 47.4180908203125}, "q_proj": {"bias": 2.7252862453460693, "kernel": 50.81111145019531}, "v_proj": {"bias": 0.41635215282440186, "kernel": 46.52488708496094}}, "feed_forward": {"intermediate_dense": {"bias": 1.9635816812515259, "kernel": 104.25914764404297}, "output_dense": {"bias": 1.1212176084518433, "kernel": 101.99238586425781}}, "final_layer_norm": {"bias": 2.3592710494995117, "scale": 22.663631439208984}, "layer_norm": {"bias": 2.2133560180664062, "scale": 23.51139259338379}}, "22": {"attention": {"k_proj": {"bias": 0.023747840896248817, "kernel": 50.355613708496094}, "out_proj": {"bias": 1.2000508308410645, "kernel": 46.872554779052734}, "q_proj": {"bias": 2.8076577186584473, "kernel": 50.73884201049805}, "v_proj": {"bias": 0.3693321943283081, "kernel": 46.74058532714844}}, "feed_forward": {"intermediate_dense": {"bias": 1.8949806690216064, "kernel": 104.65501403808594}, "output_dense": {"bias": 1.1306650638580322, "kernel": 101.26948547363281}}, "final_layer_norm": {"bias": 2.244089126586914, "scale": 22.18447494506836}, "layer_norm": {"bias": 2.2082674503326416, "scale": 22.5196475982666}}, "23": {"attention": {"k_proj": {"bias": 0.12220916152000427, "kernel": 51.456703186035156}, "out_proj": {"bias": 1.3277795314788818, "kernel": 47.87244415283203}, "q_proj": {"bias": 2.6426448822021484, "kernel": 51.569129943847656}, "v_proj": {"bias": 0.5203225612640381, "kernel": 48.51310348510742}}, "feed_forward": {"intermediate_dense": {"bias": 1.872227430343628, "kernel": 104.44924926757812}, "output_dense": {"bias": 1.108591079711914, "kernel": 102.06685638427734}}, "final_layer_norm": {"bias": 2.4933314323425293, "scale": 22.13880157470703}, "layer_norm": {"bias": 2.6961421966552734, "scale": 23.728824615478516}}, "24": {"attention": {"k_proj": {"bias": 0.058006178587675095, "kernel": 49.9398193359375}, "out_proj": {"bias": 1.3834214210510254, "kernel": 49.853782653808594}, "q_proj": {"bias": 2.8019046783447266, "kernel": 49.93363952636719}, "v_proj": {"bias": 0.4747922122478485, "kernel": 49.930416107177734}}, "feed_forward": {"intermediate_dense": {"bias": 1.9901800155639648, "kernel": 103.92146301269531}, "output_dense": {"bias": 1.1459870338439941, "kernel": 104.94160461425781}}, "final_layer_norm": {"bias": 2.5980639457702637, "scale": 22.196256637573242}, "layer_norm": {"bias": 2.4199328422546387, "scale": 23.271270751953125}}, "25": {"attention": {"k_proj": {"bias": 0.0486767403781414, "kernel": 50.49137878417969}, "out_proj": {"bias": 1.1953943967819214, "kernel": 47.765968322753906}, "q_proj": {"bias": 2.877917766571045, "kernel": 50.27877426147461}, "v_proj": {"bias": 0.5565428137779236, "kernel": 48.302940368652344}}, "feed_forward": {"intermediate_dense": {"bias": 1.8897809982299805, "kernel": 104.18898010253906}, "output_dense": {"bias": 1.0260541439056396, "kernel": 104.87144470214844}}, "final_layer_norm": {"bias": 2.3020076751708984, "scale": 22.731998443603516}, "layer_norm": {"bias": 2.5791854858398438, "scale": 22.420787811279297}}, "26": {"attention": {"k_proj": {"bias": 0.07401315867900848, "kernel": 50.69556427001953}, "out_proj": {"bias": 1.132948637008667, "kernel": 48.53791046142578}, "q_proj": {"bias": 2.8362269401550293, "kernel": 50.46051788330078}, "v_proj": {"bias": 0.4930846691131592, "kernel": 49.14529800415039}}, "feed_forward": {"intermediate_dense": {"bias": 1.9828646183013916, "kernel": 103.61874389648438}, "output_dense": {"bias": 0.9868142604827881, "kernel": 102.04234313964844}}, "final_layer_norm": {"bias": 1.9354043006896973, "scale": 21.58776092529297}, "layer_norm": {"bias": 2.483001232147217, "scale": 22.860599517822266}}, "27": {"attention": {"k_proj": {"bias": 0.37386101484298706, "kernel": 51.353492736816406}, "out_proj": {"bias": 1.360071063041687, "kernel": 49.861541748046875}, "q_proj": {"bias": 2.6180477142333984, "kernel": 51.21508026123047}, "v_proj": {"bias": 0.5687844753265381, "kernel": 50.31639099121094}}, "feed_forward": {"intermediate_dense": {"bias": 2.1435513496398926, "kernel": 101.89665985107422}, "output_dense": {"bias": 0.8681224584579468, "kernel": 101.72557067871094}}, "final_layer_norm": {"bias": 2.216545581817627, "scale": 20.85482406616211}, "layer_norm": {"bias": 2.5525119304656982, "scale": 23.54396629333496}}, "28": {"attention": {"k_proj": {"bias": 0.41085001826286316, "kernel": 52.282447814941406}, "out_proj": {"bias": 1.3871128559112549, "kernel": 50.62632751464844}, "q_proj": {"bias": 2.7654333114624023, "kernel": 51.92694854736328}, "v_proj": {"bias": 0.46052616834640503, "kernel": 50.95448684692383}}, "feed_forward": {"intermediate_dense": {"bias": 2.0904147624969482, "kernel": 101.90587615966797}, "output_dense": {"bias": 0.770317554473877, "kernel": 103.91770935058594}}, "final_layer_norm": {"bias": 2.126486301422119, "scale": 21.1737060546875}, "layer_norm": {"bias": 2.0537118911743164, "scale": 24.406505584716797}}, "29": {"attention": {"k_proj": {"bias": 0.07059745490550995, "kernel": 48.74183654785156}, "out_proj": {"bias": 1.3648455142974854, "kernel": 53.14055633544922}, "q_proj": {"bias": 2.740316867828369, "kernel": 48.56298828125}, "v_proj": {"bias": 0.4175933599472046, "kernel": 53.0452880859375}}, "feed_forward": {"intermediate_dense": {"bias": 2.0906143188476562, "kernel": 102.57981872558594}, "output_dense": {"bias": 0.8713856935501099, "kernel": 108.18291473388672}}, "final_layer_norm": {"bias": 2.367311477661133, "scale": 22.308059692382812}, "layer_norm": {"bias": 2.15020489692688, "scale": 25.381393432617188}}, "3": {"attention": {"k_proj": {"bias": 0.12173552811145782, "kernel": 50.12483215332031}, "out_proj": {"bias": 1.3624351024627686, "kernel": 46.49520492553711}, "q_proj": {"bias": 2.719555616378784, "kernel": 50.35211181640625}, "v_proj": {"bias": 0.2993015646934509, "kernel": 46.89512634277344}}, "feed_forward": {"intermediate_dense": {"bias": 1.6325161457061768, "kernel": 99.91313171386719}, "output_dense": {"bias": 0.6524742841720581, "kernel": 90.10629272460938}}, "final_layer_norm": {"bias": 1.7124708890914917, "scale": 21.081825256347656}, "layer_norm": {"bias": 1.8278560638427734, "scale": 23.59053611755371}}, "30": {"attention": {"k_proj": {"bias": 0.2557613253593445, "kernel": 50.66333770751953}, "out_proj": {"bias": 1.1596150398254395, "kernel": 49.418575286865234}, "q_proj": {"bias": 2.8001761436462402, "kernel": 50.7453498840332}, "v_proj": {"bias": 0.4823254644870758, "kernel": 49.760719299316406}}, "feed_forward": {"intermediate_dense": {"bias": 2.026075839996338, "kernel": 103.08485412597656}, "output_dense": {"bias": 0.8237862586975098, "kernel": 107.17366790771484}}, "final_layer_norm": {"bias": 2.191945791244507, "scale": 23.445449829101562}, "layer_norm": {"bias": 2.3006272315979004, "scale": 25.11294174194336}}, "31": {"attention": {"k_proj": {"bias": 0.3532944321632385, "kernel": 49.19044494628906}, "out_proj": {"bias": 1.0852205753326416, "kernel": 50.28578186035156}, "q_proj": {"bias": 2.582430362701416, "kernel": 49.29231262207031}, "v_proj": {"bias": 0.527869462966919, "kernel": 50.41516876220703}}, "feed_forward": {"intermediate_dense": {"bias": 2.1042604446411133, "kernel": 101.77178192138672}, "output_dense": {"bias": 1.001185417175293, "kernel": 104.5643310546875}}, "final_layer_norm": {"bias": 2.0816102027893066, "scale": 23.34256362915039}, "layer_norm": {"bias": 2.2965025901794434, "scale": 24.890090942382812}}, "32": {"attention": {"k_proj": {"bias": 0.2081139087677002, "kernel": 48.02776336669922}, "out_proj": {"bias": 1.0938222408294678, "kernel": 49.4695930480957}, "q_proj": {"bias": 2.8447506427764893, "kernel": 48.01158142089844}, "v_proj": {"bias": 0.3958088755607605, "kernel": 49.75651168823242}}, "feed_forward": {"intermediate_dense": {"bias": 2.0343356132507324, "kernel": 100.63871765136719}, "output_dense": {"bias": 1.0618953704833984, "kernel": 103.91098022460938}}, "final_layer_norm": {"bias": 2.0418777465820312, "scale": 23.778400421142578}, "layer_norm": {"bias": 2.2472598552703857, "scale": 25.151775360107422}}, "33": {"attention": {"k_proj": {"bias": 0.21029098331928253, "kernel": 47.97268295288086}, "out_proj": {"bias": 1.13039231300354, "kernel": 49.31745147705078}, "q_proj": {"bias": 2.9875826835632324, "kernel": 47.9653434753418}, "v_proj": {"bias": 0.42682555317878723, "kernel": 49.583251953125}}, "feed_forward": {"intermediate_dense": {"bias": 2.0421395301818848, "kernel": 99.01563262939453}, "output_dense": {"bias": 1.0347816944122314, "kernel": 102.68461608886719}}, "final_layer_norm": {"bias": 1.9543565511703491, "scale": 23.545602798461914}, "layer_norm": {"bias": 2.4407973289489746, "scale": 25.397212982177734}}, "34": {"attention": {"k_proj": {"bias": 0.22819873690605164, "kernel": 47.184783935546875}, "out_proj": {"bias": 1.3782916069030762, "kernel": 50.803382873535156}, "q_proj": {"bias": 2.8665852546691895, "kernel": 47.237979888916016}, "v_proj": {"bias": 0.3953763246536255, "kernel": 50.73908233642578}}, "feed_forward": {"intermediate_dense": {"bias": 2.1229429244995117, "kernel": 97.85118103027344}, "output_dense": {"bias": 0.9661370515823364, "kernel": 102.001708984375}}, "final_layer_norm": {"bias": 1.8967653512954712, "scale": 23.200794219970703}, "layer_norm": {"bias": 2.5211293697357178, "scale": 25.781917572021484}}, "35": {"attention": {"k_proj": {"bias": 0.35871943831443787, "kernel": 48.906837463378906}, "out_proj": {"bias": 1.2972804307937622, "kernel": 49.65650939941406}, "q_proj": {"bias": 2.615225076675415, "kernel": 49.24107360839844}, "v_proj": {"bias": 0.4798451066017151, "kernel": 49.480987548828125}}, "feed_forward": {"intermediate_dense": {"bias": 2.20389986038208, "kernel": 96.45945739746094}, "output_dense": {"bias": 0.8606913089752197, "kernel": 100.74075317382812}}, "final_layer_norm": {"bias": 1.9770451784133911, "scale": 23.324676513671875}, "layer_norm": {"bias": 2.2845458984375, "scale": 26.272663116455078}}, "36": {"attention": {"k_proj": {"bias": 0.19124102592468262, "kernel": 46.22441864013672}, "out_proj": {"bias": 1.3377137184143066, "kernel": 50.99781799316406}, "q_proj": {"bias": 2.70013689994812, "kernel": 46.213951110839844}, "v_proj": {"bias": 0.36395663022994995, "kernel": 51.181884765625}}, "feed_forward": {"intermediate_dense": {"bias": 2.07747220993042, "kernel": 95.55824279785156}, "output_dense": {"bias": 0.8954221606254578, "kernel": 100.43612670898438}}, "final_layer_norm": {"bias": 1.6182420253753662, "scale": 23.849767684936523}, "layer_norm": {"bias": 2.0086894035339355, "scale": 25.781164169311523}}, "37": {"attention": {"k_proj": {"bias": 0.5271086692810059, "kernel": 45.2613639831543}, "out_proj": {"bias": 1.598453402519226, "kernel": 50.981468200683594}, "q_proj": {"bias": 2.3939218521118164, "kernel": 45.332435607910156}, "v_proj": {"bias": 0.3595349192619324, "kernel": 50.851402282714844}}, "feed_forward": {"intermediate_dense": {"bias": 1.972982406616211, "kernel": 94.81402587890625}, "output_dense": {"bias": 0.9043532013893127, "kernel": 100.2063217163086}}, "final_layer_norm": {"bias": 1.446416974067688, "scale": 24.250770568847656}, "layer_norm": {"bias": 1.9783923625946045, "scale": 25.818511962890625}}, "38": {"attention": {"k_proj": {"bias": 0.6130545139312744, "kernel": 43.451454162597656}, "out_proj": {"bias": 1.298864483833313, "kernel": 50.46533203125}, "q_proj": {"bias": 2.3286657333374023, "kernel": 43.462501525878906}, "v_proj": {"bias": 0.4180901050567627, "kernel": 50.337013244628906}}, "feed_forward": {"intermediate_dense": {"bias": 1.918135404586792, "kernel": 92.86172485351562}, "output_dense": {"bias": 0.892139196395874, "kernel": 98.45660400390625}}, "final_layer_norm": {"bias": 1.4936511516571045, "scale": 24.967491149902344}, "layer_norm": {"bias": 2.156099557876587, "scale": 26.533777236938477}}, "39": {"attention": {"k_proj": {"bias": 0.6435011625289917, "kernel": 43.22444152832031}, "out_proj": {"bias": 1.5929176807403564, "kernel": 50.33856201171875}, "q_proj": {"bias": 2.1112735271453857, "kernel": 43.61250686645508}, "v_proj": {"bias": 0.38804692029953003, "kernel": 50.01123046875}}, "feed_forward": {"intermediate_dense": {"bias": 1.9113759994506836, "kernel": 91.18324279785156}, "output_dense": {"bias": 0.971623420715332, "kernel": 98.84017944335938}}, "final_layer_norm": {"bias": 1.6386053562164307, "scale": 25.60051918029785}, "layer_norm": {"bias": 2.134451389312744, "scale": 27.175662994384766}}, "4": {"attention": {"k_proj": {"bias": 0.13544148206710815, "kernel": 52.686279296875}, "out_proj": {"bias": 1.5433743000030518, "kernel": 47.89597702026367}, "q_proj": {"bias": 2.520129680633545, "kernel": 52.86738967895508}, "v_proj": {"bias": 0.34588170051574707, "kernel": 48.25390625}}, "feed_forward": {"intermediate_dense": {"bias": 1.621065378189087, "kernel": 99.4951171875}, "output_dense": {"bias": 0.8157577514648438, "kernel": 91.33027648925781}}, "final_layer_norm": {"bias": 1.797032356262207, "scale": 20.613189697265625}, "layer_norm": {"bias": 1.921250343322754, "scale": 23.96576690673828}}, "40": {"attention": {"k_proj": {"bias": 0.5847006440162659, "kernel": 42.58441162109375}, "out_proj": {"bias": 1.53633451461792, "kernel": 48.99122619628906}, "q_proj": {"bias": 2.0470333099365234, "kernel": 43.350059509277344}, "v_proj": {"bias": 0.44075465202331543, "kernel": 48.568878173828125}}, "feed_forward": {"intermediate_dense": {"bias": 1.7721619606018066, "kernel": 89.45103454589844}, "output_dense": {"bias": 1.0236999988555908, "kernel": 96.0992660522461}}, "final_layer_norm": {"bias": 1.799966812133789, "scale": 24.871023178100586}, "layer_norm": {"bias": 2.0770342350006104, "scale": 26.721298217773438}}, "41": {"attention": {"k_proj": {"bias": 1.6700019836425781, "kernel": 39.923744201660156}, "out_proj": {"bias": 1.298099398612976, "kernel": 50.55332946777344}, "q_proj": {"bias": 1.7261749505996704, "kernel": 40.68635559082031}, "v_proj": {"bias": 0.3966291546821594, "kernel": 49.504920959472656}}, "feed_forward": {"intermediate_dense": {"bias": 1.9124207496643066, "kernel": 86.245361328125}, "output_dense": {"bias": 1.0469214916229248, "kernel": 95.14845275878906}}, "final_layer_norm": {"bias": 2.298802614212036, "scale": 28.323535919189453}, "layer_norm": {"bias": 2.107060432434082, "scale": 28.51062774658203}}, "42": {"attention": {"k_proj": {"bias": 0.7963449954986572, "kernel": 36.71210479736328}, "out_proj": {"bias": 1.338565707206726, "kernel": 44.78729248046875}, "q_proj": {"bias": 1.5457658767700195, "kernel": 38.06196594238281}, "v_proj": {"bias": 0.5876978635787964, "kernel": 43.13445281982422}}, "feed_forward": {"intermediate_dense": {"bias": 1.6503515243530273, "kernel": 85.23811340332031}, "output_dense": {"bias": 1.0994179248809814, "kernel": 93.35334777832031}}, "final_layer_norm": {"bias": 2.021554470062256, "scale": 29.623184204101562}, "layer_norm": {"bias": 1.5729947090148926, "scale": 27.38263702392578}}, "43": {"attention": {"k_proj": {"bias": 1.2092329263687134, "kernel": 33.23188018798828}, "out_proj": {"bias": 1.3312959671020508, "kernel": 41.18092346191406}, "q_proj": {"bias": 1.356087327003479, "kernel": 34.04554748535156}, "v_proj": {"bias": 0.5175312757492065, "kernel": 39.07632064819336}}, "feed_forward": {"intermediate_dense": {"bias": 1.6842585802078247, "kernel": 84.46446228027344}, "output_dense": {"bias": 0.8656268119812012, "kernel": 91.28285217285156}}, "final_layer_norm": {"bias": 1.9466145038604736, "scale": 31.840717315673828}, "layer_norm": {"bias": 1.6922515630722046, "scale": 25.534618377685547}}, "44": {"attention": {"k_proj": {"bias": 2.490459442138672, "kernel": 33.82369613647461}, "out_proj": {"bias": 1.0941083431243896, "kernel": 44.90596008300781}, "q_proj": {"bias": 1.2875206470489502, "kernel": 34.19591522216797}, "v_proj": {"bias": 0.3790741264820099, "kernel": 43.999244689941406}}, "feed_forward": {"intermediate_dense": {"bias": 1.7634968757629395, "kernel": 83.41915893554688}, "output_dense": {"bias": 0.8121882081031799, "kernel": 88.93473052978516}}, "final_layer_norm": {"bias": 1.9330906867980957, "scale": 34.01293182373047}, "layer_norm": {"bias": 1.5868189334869385, "scale": 25.552824020385742}}, "45": {"attention": {"k_proj": {"bias": 2.0480833053588867, "kernel": 33.66320037841797}, "out_proj": {"bias": 0.9800894260406494, "kernel": 48.50392532348633}, "q_proj": {"bias": 1.3665473461151123, "kernel": 33.8492431640625}, "v_proj": {"bias": 0.4303898215293884, "kernel": 48.66197204589844}}, "feed_forward": {"intermediate_dense": {"bias": 1.881667137145996, "kernel": 80.08865356445312}, "output_dense": {"bias": 0.94748854637146, "kernel": 84.32666778564453}}, "final_layer_norm": {"bias": 1.6785303354263306, "scale": 32.72064971923828}, "layer_norm": {"bias": 1.5169761180877686, "scale": 24.069011688232422}}, "46": {"attention": {"k_proj": {"bias": 1.5384384393692017, "kernel": 34.843414306640625}, "out_proj": {"bias": 0.7449491024017334, "kernel": 50.93366241455078}, "q_proj": {"bias": 1.5333590507507324, "kernel": 34.964630126953125}, "v_proj": {"bias": 0.37132495641708374, "kernel": 51.68553161621094}}, "feed_forward": {"intermediate_dense": {"bias": 1.941842794418335, "kernel": 74.42733764648438}, "output_dense": {"bias": 1.1018041372299194, "kernel": 74.62886047363281}}, "final_layer_norm": {"bias": 1.6753082275390625, "scale": 28.232973098754883}, "layer_norm": {"bias": 1.3341909646987915, "scale": 22.984222412109375}}, "47": {"attention": {"k_proj": {"bias": 0.2589734196662903, "kernel": 37.107086181640625}, "out_proj": {"bias": 0.6299062967300415, "kernel": 45.20429992675781}, "q_proj": {"bias": 1.651952862739563, "kernel": 37.7532958984375}, "v_proj": {"bias": 0.3462907671928406, "kernel": 46.18851852416992}}, "feed_forward": {"intermediate_dense": {"bias": 1.9934666156768799, "kernel": 71.76575469970703}, "output_dense": {"bias": 0.605868935585022, "kernel": 68.13175201416016}}, "final_layer_norm": {"bias": 1.5152955055236816, "scale": 23.07292366027832}, "layer_norm": {"bias": 1.0596134662628174, "scale": 20.234088897705078}}, "5": {"attention": {"k_proj": {"bias": 0.018954617902636528, "kernel": 48.02461242675781}, "out_proj": {"bias": 1.5274896621704102, "kernel": 49.122745513916016}, "q_proj": {"bias": 2.616206645965576, "kernel": 48.16704177856445}, "v_proj": {"bias": 0.30917540192604065, "kernel": 49.918846130371094}}, "feed_forward": {"intermediate_dense": {"bias": 1.5450689792633057, "kernel": 99.6058120727539}, "output_dense": {"bias": 0.845067024230957, "kernel": 90.62794494628906}}, "final_layer_norm": {"bias": 2.0742123126983643, "scale": 20.826757431030273}, "layer_norm": {"bias": 1.9509385824203491, "scale": 23.38003921508789}}, "6": {"attention": {"k_proj": {"bias": 0.20099012553691864, "kernel": 49.64418411254883}, "out_proj": {"bias": 1.5177661180496216, "kernel": 48.44267272949219}, "q_proj": {"bias": 2.6649043560028076, "kernel": 50.12384033203125}, "v_proj": {"bias": 0.3115385174751282, "kernel": 48.973114013671875}}, "feed_forward": {"intermediate_dense": {"bias": 1.5237888097763062, "kernel": 98.6987075805664}, "output_dense": {"bias": 0.6965006589889526, "kernel": 90.21868133544922}}, "final_layer_norm": {"bias": 2.374725818634033, "scale": 20.303024291992188}, "layer_norm": {"bias": 1.9539954662322998, "scale": 23.747432708740234}}, "7": {"attention": {"k_proj": {"bias": 0.19492888450622559, "kernel": 49.445472717285156}, "out_proj": {"bias": 1.3328473567962646, "kernel": 48.69258117675781}, "q_proj": {"bias": 2.440291404724121, "kernel": 49.83523941040039}, "v_proj": {"bias": 0.39602091908454895, "kernel": 48.65628433227539}}, "feed_forward": {"intermediate_dense": {"bias": 1.5299232006072998, "kernel": 98.44473266601562}, "output_dense": {"bias": 0.5384174585342407, "kernel": 89.95935821533203}}, "final_layer_norm": {"bias": 2.2129130363464355, "scale": 20.541099548339844}, "layer_norm": {"bias": 1.8585599660873413, "scale": 22.472408294677734}}, "8": {"attention": {"k_proj": {"bias": 0.17308779060840607, "kernel": 48.946983337402344}, "out_proj": {"bias": 1.1581041812896729, "kernel": 49.24256896972656}, "q_proj": {"bias": 2.4154317378997803, "kernel": 48.71516418457031}, "v_proj": {"bias": 0.32466843724250793, "kernel": 49.42582702636719}}, "feed_forward": {"intermediate_dense": {"bias": 1.5838823318481445, "kernel": 98.05018615722656}, "output_dense": {"bias": 0.49450039863586426, "kernel": 89.38335418701172}}, "final_layer_norm": {"bias": 2.1680216789245605, "scale": 20.328733444213867}, "layer_norm": {"bias": 1.7943463325500488, "scale": 22.934885025024414}}, "9": {"attention": {"k_proj": {"bias": 0.20712265372276306, "kernel": 49.56227111816406}, "out_proj": {"bias": 1.3597404956817627, "kernel": 50.03252410888672}, "q_proj": {"bias": 2.3744239807128906, "kernel": 49.721893310546875}, "v_proj": {"bias": 0.33329081535339355, "kernel": 50.44175338745117}}, "feed_forward": {"intermediate_dense": {"bias": 1.665709376335144, "kernel": 96.65989685058594}, "output_dense": {"bias": 0.6352252960205078, "kernel": 89.92665100097656}}, "final_layer_norm": {"bias": 2.058505058288574, "scale": 19.611835479736328}, "layer_norm": {"bias": 1.8844467401504517, "scale": 24.294519424438477}}}, "pos_conv_embed": {"conv": {"bias": 5.548188209533691, "weight_g": 8.812222480773926, "weight_v": 84.63363647460938}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 2.0290679931640625, "kernel": 20.55536460876465}, "layer_norm": {"bias": 4.550922393798828, "scale": 16.167570114135742}}, "1": {"conv": {"bias": 1.7790228128433228, "kernel": 51.24136734008789}, "layer_norm": {"bias": 5.962646961212158, "scale": 23.268157958984375}}, "2": {"conv": {"bias": 1.140576720237732, "kernel": 46.50312042236328}, "layer_norm": {"bias": 4.176670551300049, "scale": 20.370853424072266}}, "3": {"conv": {"bias": 0.6725863218307495, "kernel": 44.397525787353516}, "layer_norm": {"bias": 3.888174533843994, "scale": 17.53795051574707}}, "4": {"conv": {"bias": 0.6373162269592285, "kernel": 41.314056396484375}, "layer_norm": {"bias": 2.385471820831299, "scale": 16.34571647644043}}, "5": {"conv": {"bias": 0.5147221684455872, "kernel": 37.479759216308594}, "layer_norm": {"bias": 2.020900011062622, "scale": 17.064470291137695}}, "6": {"conv": {"bias": 0.4947893023490906, "kernel": 40.64780044555664}, "layer_norm": {"bias": 0.5876954793930054, "scale": 19.058603286743164}}}}, "feature_projection": {"layer_norm": {"bias": 6.323210716247559, "scale": 16.550113677978516}, "projection": {"bias": 1.6564881801605225, "kernel": 34.68690490722656}}, "masked_spec_embed": 11.914372444152832}}, "train/learning_rate": 2.497500099707395e-05, "train/loss": 1.0381834506988525, "train/param_norm": 1186.01953125, "_runtime": 6204, "_timestamp": 1659195568, "_step": 1000, "_wandb": {"runtime": 6205}} \ No newline at end of file diff --git a/wandb/run-20220730_135604-y1b5rbiq/logs/debug-internal.log b/wandb/run-20220730_135604-y1b5rbiq/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..37892c0e7c9bab0ce2bdb4f9a1a97b3c8080bc6f --- /dev/null +++ b/wandb/run-20220730_135604-y1b5rbiq/logs/debug-internal.log @@ -0,0 +1,2398 @@ +2022-07-30 13:56:05,186 INFO MainThread:2277729 [internal.py:wandb_internal():87] W&B internal server running at pid: 2277729, started at: 2022-07-30 13:56:05.186478 +2022-07-30 13:56:05,188 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: check_version +2022-07-30 13:56:05,188 INFO WriterThread:2277729 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/run-y1b5rbiq.wandb +2022-07-30 13:56:05,189 DEBUG SenderThread:2277729 [sender.py:send():234] send: header +2022-07-30 13:56:05,189 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: check_version +2022-07-30 13:56:05,227 DEBUG SenderThread:2277729 [sender.py:send():234] send: run +2022-07-30 13:56:05,409 INFO SenderThread:2277729 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files +2022-07-30 13:56:05,409 INFO SenderThread:2277729 [sender.py:_start_run_threads():804] run started: y1b5rbiq with start time 1659189364 +2022-07-30 13:56:05,409 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 13:56:05,409 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: run_start +2022-07-30 13:56:05,410 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 13:56:06,413 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 13:56:07,787 DEBUG HandlerThread:2277729 [meta.py:__init__():40] meta init +2022-07-30 13:56:07,788 DEBUG HandlerThread:2277729 [meta.py:__init__():54] meta init done +2022-07-30 13:56:07,788 DEBUG HandlerThread:2277729 [meta.py:probe():214] probe +2022-07-30 13:56:07,789 DEBUG HandlerThread:2277729 [meta.py:_setup_git():204] setup git +2022-07-30 13:56:07,831 DEBUG HandlerThread:2277729 [meta.py:_setup_git():211] setup git done +2022-07-30 13:56:07,831 DEBUG HandlerThread:2277729 [meta.py:_save_code():92] save code +2022-07-30 13:56:07,844 DEBUG HandlerThread:2277729 [meta.py:_save_code():113] save code done +2022-07-30 13:56:07,845 DEBUG HandlerThread:2277729 [meta.py:_save_patches():130] save patches +2022-07-30 13:56:07,921 DEBUG HandlerThread:2277729 [meta.py:_save_patches():172] save patches done +2022-07-30 13:56:07,922 DEBUG HandlerThread:2277729 [meta.py:_save_pip():58] save pip +2022-07-30 13:56:07,922 DEBUG HandlerThread:2277729 [meta.py:_save_pip():72] save pip done +2022-07-30 13:56:07,922 DEBUG HandlerThread:2277729 [meta.py:probe():252] probe done +2022-07-30 13:56:07,926 DEBUG SenderThread:2277729 [sender.py:send():234] send: files +2022-07-30 13:56:07,926 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now +2022-07-30 13:56:07,926 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now +2022-07-30 13:56:07,927 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file diff.patch with policy now +2022-07-30 13:56:07,933 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:56:07,933 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:56:08,396 INFO Thread-11 :2277729 [upload_job.py:push():137] Uploaded file /tmp/tmp17z1awz3wandb/3e9bk1i5-wandb-metadata.json +2022-07-30 13:56:08,418 INFO Thread-13 :2277729 [upload_job.py:push():137] Uploaded file /tmp/tmp17z1awz3wandb/3h7xlesk-diff.patch +2022-07-30 13:56:08,421 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-metadata.json +2022-07-30 13:56:08,421 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/code/run_flax_speech_recognition_ctc.py +2022-07-30 13:56:08,421 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt +2022-07-30 13:56:08,422 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/diff.patch +2022-07-30 13:56:08,422 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:56:08,422 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/code +2022-07-30 13:56:08,624 INFO Thread-12 :2277729 [upload_job.py:push():137] Uploaded file /tmp/tmp17z1awz3wandb/2c1tu24r-code/run_flax_speech_recognition_ctc.py +2022-07-30 13:56:10,422 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:56:12,423 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:56:14,424 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:56:16,425 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:56:22,428 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:56:23,070 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:56:23,070 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:56:24,429 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:56:35,871 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 13:56:38,212 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:56:38,213 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:56:38,435 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:56:40,436 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:56:51,441 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:56:53,346 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:56:53,346 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:56:53,442 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:57:05,946 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 13:57:07,448 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:57:08,479 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:57:08,479 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:57:23,631 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:57:23,631 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:57:36,022 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 13:57:38,782 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:57:38,782 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:57:48,464 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:57:50,465 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:57:52,466 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:57:53,977 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:57:53,977 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:57:54,467 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:57:56,468 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:57:58,469 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:00,470 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:03,472 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:05,473 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:06,098 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 13:58:07,474 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:09,401 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:58:09,401 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:58:09,475 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:11,475 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:13,476 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:15,477 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:17,478 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:19,479 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:21,480 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:23,481 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:24,586 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:58:24,586 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:58:25,482 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:27,483 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:29,484 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:31,485 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:33,486 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:35,487 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:36,169 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 13:58:37,488 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:39,489 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:39,731 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:58:39,731 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:58:41,490 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:43,491 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:45,492 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:47,493 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:49,494 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:51,495 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:53,496 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:54,878 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:58:54,878 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:58:55,498 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:57,498 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:58:59,499 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:01,501 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:03,502 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:05,503 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:06,239 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 13:59:07,504 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:09,505 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:10,015 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:59:10,016 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:59:11,506 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:13,507 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:15,510 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:17,511 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:19,512 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:21,513 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:23,514 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:25,178 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:59:25,178 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:59:25,515 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:27,516 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:29,517 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:31,518 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:33,519 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:35,520 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:36,321 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 13:59:37,521 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:39,522 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:40,357 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:59:40,358 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:59:41,523 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:43,524 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:45,525 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:47,529 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:49,530 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:51,531 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:53,532 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:55,509 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 13:59:55,510 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 13:59:55,534 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:57,534 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 13:59:59,535 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:01,536 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:03,537 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:05,539 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:06,406 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:00:07,540 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:09,541 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:10,645 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:00:10,645 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:00:11,542 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:13,543 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:16,544 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:18,546 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:20,547 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:22,553 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:24,554 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:25,853 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:00:25,854 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:00:26,556 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:28,557 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:30,558 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:32,559 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:34,561 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:36,479 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:00:36,562 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:38,563 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:40,564 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:41,000 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:00:41,000 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:00:42,565 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:44,566 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:46,568 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:48,569 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:50,570 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:52,571 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:54,572 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:56,140 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:00:56,140 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:00:56,573 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:00:58,574 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:00,575 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:02,576 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:04,577 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:06,561 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:01:06,579 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:08,580 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:10,581 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:11,282 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:01:11,283 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:01:12,583 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:14,584 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:16,586 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:18,586 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:20,587 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:22,588 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:24,589 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:26,431 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:01:26,431 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:01:26,590 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:28,591 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:30,592 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:32,593 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:34,595 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:36,596 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:36,639 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:01:38,597 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:40,598 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:41,578 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:01:41,579 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:01:42,599 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:44,600 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:46,601 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:48,602 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:50,603 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:52,604 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:54,605 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:56,606 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:01:56,745 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:01:56,745 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:01:58,607 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:00,608 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:02,609 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:04,611 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:06,612 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:06,718 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:02:08,613 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:10,614 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:11,896 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:02:11,896 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:02:12,615 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:14,616 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:16,617 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:18,618 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:20,619 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:22,620 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:24,621 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:26,622 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:27,039 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:02:27,039 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:02:28,623 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:30,624 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:32,625 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:34,626 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:36,627 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:36,797 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:02:38,628 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:40,629 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:42,178 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:02:42,179 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:02:42,630 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:44,631 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:46,632 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:48,633 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:50,634 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:52,635 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:54,636 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:56,637 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:02:57,337 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:02:57,339 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:02:58,639 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:00,639 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:02,640 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:04,641 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:06,642 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:06,872 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:03:08,643 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:10,644 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:12,486 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:03:12,486 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:03:12,645 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:14,647 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:16,648 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:18,649 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:20,650 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:22,651 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:27,688 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:03:27,689 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:03:36,945 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:03:42,825 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:03:42,825 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:03:53,663 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:55,664 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:57,665 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:03:57,961 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:03:57,962 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:03:59,666 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:01,667 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:03,668 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:05,669 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:07,029 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:04:07,670 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:09,671 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:11,675 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:13,096 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:04:13,097 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:04:13,676 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:15,677 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:17,678 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:19,679 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:21,680 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:23,681 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:25,682 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:27,683 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:28,234 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:04:28,234 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:04:29,684 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:31,685 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:33,686 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:04:37,105 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:04:43,373 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:04:43,373 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:04:58,507 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:04:58,508 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:05:07,182 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:05:07,701 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:09,702 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:11,703 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:13,652 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:05:13,652 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:05:13,703 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:15,704 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:17,705 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:19,706 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:21,708 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:23,709 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:25,709 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:27,710 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:28,792 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:05:28,793 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:05:30,712 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:32,713 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:34,713 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:36,715 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:37,252 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:05:40,717 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:42,718 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:43,951 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:05:43,952 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:05:44,719 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:46,720 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:48,721 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:50,722 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:52,723 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:54,723 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:56,724 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:58,725 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:05:59,091 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:05:59,092 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:06:00,726 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:02,727 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:04,728 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:06,729 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:07,337 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:06:08,730 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:10,732 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:12,734 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:14,247 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:06:14,248 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:06:14,734 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:16,735 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:18,736 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:20,739 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:22,739 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:24,740 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:26,740 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:28,742 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:06:29,397 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:06:29,397 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:06:37,413 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:06:44,536 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:06:44,537 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:06:59,669 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:06:59,670 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:07:07,490 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:07:10,761 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:07:14,981 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:07:14,981 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:07:19,765 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:07:25,767 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:07:30,164 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:07:30,164 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:07:31,770 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:07:37,560 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:07:37,773 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:07:44,776 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:07:45,305 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:07:45,305 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:07:46,777 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:07:50,778 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:07:54,780 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:07:56,781 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:08:00,442 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:08:00,442 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:08:04,785 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:08:07,629 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:08:15,594 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:08:15,594 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:08:30,756 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:08:30,756 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:08:33,796 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:08:37,702 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:08:41,800 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:08:43,801 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:08:45,933 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:08:45,933 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:09:01,096 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:09:01,096 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:09:07,774 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:09:16,245 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:09:16,245 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:09:31,388 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:09:31,389 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:09:37,845 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:09:46,522 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:09:46,523 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:10:01,658 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:10:01,658 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:10:07,919 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:10:16,795 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:10:16,795 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:10:31,933 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:10:31,933 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:10:37,998 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:10:47,069 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:10:47,069 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:11:02,209 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:11:02,210 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:11:08,072 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:11:17,348 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:11:17,348 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:11:32,498 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:11:32,499 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:11:38,151 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:11:47,630 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:11:47,630 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:12:02,800 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:12:02,800 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:12:08,343 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:12:17,934 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:12:17,935 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:12:24,907 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:12:33,098 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:12:33,099 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:12:38,420 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:12:48,261 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:12:48,262 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:13:03,672 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:13:03,672 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:13:08,494 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:13:18,830 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:13:18,830 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:13:33,977 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:13:33,977 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:13:38,572 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:13:49,120 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:13:49,120 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:14:04,252 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:14:04,253 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:14:08,648 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:14:19,390 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:14:19,390 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:14:34,525 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:14:34,525 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:14:38,722 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:14:49,658 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:14:49,658 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:15:04,815 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:15:04,815 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:15:08,800 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:15:19,946 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:15:19,946 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:15:35,085 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:15:35,086 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:15:38,877 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:15:50,224 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:15:50,224 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:16:05,364 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:16:05,365 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:16:08,949 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:16:20,501 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:16:20,501 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:16:35,638 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:16:35,638 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:16:39,026 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:16:48,017 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:16:50,838 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:16:50,839 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:17:06,045 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:17:06,045 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:17:09,100 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:17:21,243 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:17:21,243 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:17:36,401 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:17:36,402 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:17:39,173 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:17:51,549 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:17:51,550 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:18:06,684 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:18:06,684 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:18:09,249 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:18:21,820 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:18:21,821 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:18:36,960 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:18:36,960 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:18:39,326 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:18:52,093 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:18:52,093 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:19:07,239 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:19:07,240 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:19:09,400 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:19:22,441 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:19:22,441 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:19:37,586 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:19:37,587 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:19:39,466 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:19:52,726 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:19:52,727 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:20:07,858 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:20:07,859 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:20:09,534 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:20:22,993 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:20:22,994 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:20:38,130 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:20:38,130 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:20:39,629 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:20:53,278 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:20:53,278 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:21:08,423 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:21:08,423 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:21:09,734 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:21:16,129 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:21:23,574 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:21:23,574 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:21:38,737 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:21:38,738 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:21:39,804 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:21:53,901 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:21:53,902 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:22:09,073 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:22:09,073 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:22:09,872 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:22:24,234 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:22:24,235 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:22:39,371 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:22:39,371 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:22:39,935 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:22:54,505 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:22:54,505 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:23:09,643 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:23:09,643 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:23:09,999 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:23:24,778 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:23:24,778 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:23:39,924 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:23:39,924 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:23:40,068 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:23:55,059 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:23:55,059 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:24:10,143 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:24:10,193 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:24:10,193 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:24:25,327 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:24:25,327 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:24:40,218 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:24:40,461 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:24:40,462 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:24:55,601 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:24:55,602 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:25:10,310 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:25:10,737 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:25:10,738 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:25:25,886 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:25:25,887 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:25:40,387 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:25:41,746 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:25:41,746 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:25:43,250 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:25:47,252 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:25:53,254 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:25:56,987 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:25:56,987 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:25:58,257 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:26:04,259 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:26:10,454 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:26:12,307 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:26:12,307 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:26:27,768 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:26:27,768 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:26:40,524 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:26:42,936 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:26:42,936 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:26:58,101 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:26:58,101 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:27:10,594 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:27:13,245 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:27:13,246 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:27:28,379 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:27:28,379 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:27:40,665 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:27:43,518 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:27:43,518 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:27:58,654 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:27:58,655 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:28:10,735 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:28:13,818 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:28:13,818 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:28:28,953 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:28:28,953 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:28:40,809 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:28:44,095 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:28:44,096 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:28:59,232 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:28:59,233 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:29:10,882 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:29:14,366 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:29:14,367 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:29:29,499 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:29:29,499 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:29:40,958 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:29:44,666 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:29:44,667 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:29:59,804 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:29:59,805 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:30:11,042 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:30:14,949 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:30:14,950 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:30:25,365 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:30:29,366 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:30:30,225 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:30:30,226 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:30:35,369 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:30:39,370 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:30:41,121 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:30:43,372 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:30:45,590 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:30:45,590 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:30:47,374 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:30:51,375 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:30:56,378 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:30:58,379 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:31:00,911 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:31:00,911 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:31:02,380 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:31:11,194 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:31:16,224 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:31:16,225 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:31:31,380 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:31:31,380 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:31:41,269 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:31:46,552 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:31:46,553 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:32:01,698 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:32:01,698 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:32:11,345 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:32:16,831 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:32:16,832 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:32:31,965 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:32:31,965 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:32:41,420 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:32:47,101 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:32:47,102 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:33:02,236 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:33:02,236 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:33:11,495 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:33:17,376 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:33:17,376 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:33:32,510 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:33:32,510 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:33:41,569 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:33:47,652 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:33:47,652 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:34:02,782 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:34:02,782 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:34:11,645 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:34:17,915 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:34:17,916 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:34:33,062 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:34:33,062 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:34:41,717 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:34:48,201 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:34:48,201 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:35:03,335 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:35:03,336 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:35:11,801 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:35:18,469 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:35:18,469 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:35:33,487 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:35:33,709 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:35:33,709 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:35:37,489 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:35:40,490 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:35:41,881 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:35:44,492 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:35:48,494 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:35:48,998 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:35:48,998 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:35:50,495 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:35:54,497 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:35:56,498 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:35:58,499 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:36:02,500 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:36:04,253 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:36:04,253 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:36:04,501 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:36:06,502 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:36:10,504 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:36:11,952 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:36:19,604 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:36:19,604 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:36:34,765 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:36:34,765 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:36:42,025 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:36:49,926 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:36:49,927 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:37:05,074 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:37:05,074 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:37:12,097 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:37:20,211 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:37:20,211 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:37:35,343 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:37:35,344 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:37:42,174 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:37:50,476 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:37:50,476 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:38:05,614 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:38:05,615 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:38:12,248 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:38:20,748 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:38:20,749 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:38:35,886 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:38:35,886 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:38:42,325 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:38:51,015 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:38:51,016 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:39:06,154 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:39:06,154 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:39:12,400 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:39:21,670 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:39:21,671 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:39:36,813 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:39:36,814 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:39:42,474 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:39:51,956 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:39:51,956 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:40:07,098 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:40:07,099 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:40:12,553 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:40:22,240 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:40:22,240 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:40:26,611 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:31,612 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:33,613 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:35,614 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:37,427 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:40:37,427 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:40:37,615 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:39,616 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:41,617 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:42,625 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:40:43,618 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:45,619 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:47,620 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:49,621 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:51,622 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:40:52,614 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:40:52,615 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:41:07,797 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:41:07,797 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:41:12,701 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:41:22,967 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:41:22,967 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:41:38,136 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:41:38,136 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:41:42,775 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:41:53,278 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:41:53,278 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:42:08,415 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:42:08,415 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:42:12,853 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:42:23,548 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:42:23,548 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:42:38,684 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:42:38,685 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:42:42,930 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:42:53,846 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:42:53,847 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:43:08,983 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:43:08,983 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:43:13,006 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:43:24,120 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:43:24,121 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:43:39,257 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:43:39,257 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:43:43,081 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:43:54,415 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:43:54,415 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:44:09,551 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:44:09,552 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:44:13,159 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:44:24,699 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:44:24,699 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:44:39,837 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:44:39,838 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:44:43,231 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:44:54,971 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:44:54,971 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:45:01,720 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:45:03,721 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:45:06,722 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:45:10,702 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:45:10,702 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:45:13,304 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:45:16,726 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:45:24,729 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:45:26,133 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:45:26,133 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:45:30,732 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:45:38,735 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:45:41,402 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:45:41,403 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:45:42,736 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:45:43,376 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:45:49,739 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:45:55,741 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:45:56,757 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:45:56,758 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:45:59,743 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:05,746 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:09,747 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:12,002 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:46:12,003 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:46:13,442 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:46:15,750 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:19,751 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:24,754 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:27,234 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:46:27,234 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:46:28,755 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:32,757 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:36,759 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:40,761 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:42,592 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:46:42,593 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:46:43,509 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:46:44,762 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:48,764 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:53,766 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:55,767 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:46:57,810 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:46:57,810 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:46:59,769 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:03,770 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:05,771 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:09,773 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:13,100 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:47:13,100 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:47:13,578 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:47:13,775 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:15,776 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:19,777 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:21,778 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:23,779 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:27,781 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:28,297 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:47:28,297 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:47:29,782 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:34,784 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:36,784 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:38,785 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:40,786 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:42,787 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:43,506 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:47:43,506 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:47:43,650 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:47:44,788 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:46,789 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:48,790 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:50,791 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:52,792 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:54,793 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:56,794 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:47:58,671 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:47:58,671 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:47:58,794 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:00,795 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:01,796 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:02,796 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:13,721 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:48:13,800 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:13,955 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:48:13,955 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:48:19,803 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:27,806 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:29,259 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:48:29,259 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:48:33,811 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:39,813 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:43,793 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:48:44,548 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:48:44,548 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:48:46,816 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:50,818 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:56,821 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:48:59,831 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:48:59,832 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:49:00,823 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:04,825 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:10,827 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:13,866 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:49:14,829 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:15,610 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:49:15,611 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:49:18,831 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:22,833 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:27,835 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:30,935 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:49:30,935 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:49:31,837 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:35,838 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:39,840 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:43,841 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:43,941 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:49:46,133 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:49:46,133 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:49:49,844 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:53,846 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:57,848 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:49:59,849 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:01,321 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:50:01,322 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:50:03,850 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:08,852 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:10,853 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:14,012 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:50:14,855 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:16,594 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:50:16,594 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:50:16,856 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:20,858 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:22,859 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:24,859 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:26,860 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:30,862 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:31,864 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:50:31,864 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:50:32,863 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:34,864 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:36,865 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:38,866 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:40,867 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:42,868 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:44,086 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:50:44,869 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:46,869 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:47,060 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:50:47,060 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:50:48,870 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:51,872 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:53,873 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:55,873 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:57,874 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:50:59,875 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:51:01,876 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:51:02,375 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:51:02,375 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:51:10,880 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:51:14,159 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:51:17,861 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:51:17,861 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:51:18,884 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:51:24,886 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:51:33,068 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:51:33,069 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:51:34,890 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:51:40,893 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:51:44,231 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:51:46,895 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:51:48,267 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:51:48,267 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:51:51,898 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:51:57,900 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:03,544 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:52:03,544 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:52:05,903 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:09,905 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:13,907 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:14,307 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:52:17,908 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:18,815 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:52:18,815 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:52:21,910 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:25,912 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:29,913 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:34,127 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:52:34,127 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:52:34,915 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:38,917 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:42,919 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:44,382 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:52:46,921 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:49,399 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:52:49,400 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:52:50,923 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:54,925 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:52:56,925 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:00,927 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:02,928 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:04,675 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:53:04,675 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:53:06,930 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:09,932 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:13,933 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:14,456 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:53:15,934 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:19,879 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:53:19,880 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:53:19,936 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:24,938 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:26,939 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:28,940 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:32,942 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:34,943 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:35,075 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:53:35,076 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:53:36,944 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:38,945 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:40,946 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:42,947 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:44,532 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:53:44,948 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:46,949 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:48,950 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:50,284 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:53:50,285 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:53:50,951 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:52,952 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:54,952 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:56,953 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:58,954 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:53:59,864 DEBUG SenderThread:2277729 [sender.py:send():234] send: history +2022-07-30 14:53:59,868 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 14:53:59,872 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 14:53:59,955 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 14:54:00,955 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:54:01,956 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:54:05,540 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:54:05,541 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:54:11,960 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:54:14,610 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:54:17,963 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:54:20,988 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:54:20,989 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:54:25,966 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:54:31,968 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:54:36,301 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:54:36,301 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:54:38,971 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:54:42,973 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:54:44,686 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:54:48,976 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:54:51,565 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:54:51,566 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:54:52,977 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:54:58,980 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:02,982 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:06,891 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:55:06,892 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:55:07,984 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:13,987 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:14,761 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:55:17,989 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:21,991 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:22,184 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:55:22,185 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:55:23,992 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:27,993 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:31,995 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:35,997 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:37,439 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:55:37,439 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:55:39,999 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:44,001 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:44,836 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:55:49,003 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:51,004 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:52,781 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:55:52,782 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:55:55,005 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:55:57,006 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:01,008 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:03,009 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:07,011 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:08,034 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:56:08,034 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:56:09,012 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:13,014 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:14,910 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:56:15,015 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:17,015 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:21,017 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:23,245 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:56:23,245 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:56:24,018 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:26,019 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:28,020 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:30,021 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:32,022 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:34,023 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:36,024 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:38,025 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:38,430 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:56:38,431 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:56:40,026 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:42,027 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:44,028 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:44,987 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:56:46,029 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:48,030 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:50,031 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:56:53,725 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:56:53,725 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:57:02,035 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:57:09,039 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:57:09,147 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:57:09,147 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:57:15,066 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:57:17,042 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:57:23,044 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:57:24,449 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:57:24,450 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:57:29,047 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:57:34,049 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:57:39,734 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:57:39,735 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:57:40,052 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:57:44,053 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:57:45,141 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:57:50,056 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:57:54,058 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:57:55,077 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:57:55,077 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:57:59,060 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:03,062 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:07,064 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:10,366 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:58:10,366 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:58:11,066 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:15,068 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:15,221 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:58:19,069 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:23,071 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:25,640 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:58:25,640 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:58:27,073 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:31,075 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:36,077 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:38,078 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:40,876 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:58:40,876 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:58:42,080 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:45,297 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:58:46,082 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:48,083 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:52,085 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:54,086 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:58:56,105 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:58:56,105 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:58:58,088 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:00,088 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:02,089 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:06,091 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:08,092 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:10,093 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:11,382 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:59:11,382 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:59:12,094 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:15,096 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:15,370 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:59:17,097 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:19,098 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:21,099 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:23,100 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:25,101 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:26,589 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:59:26,589 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:59:27,102 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:29,103 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:31,104 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:33,105 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:35,106 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:37,107 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:39,108 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:39,561 DEBUG SenderThread:2277729 [sender.py:send():234] send: history +2022-07-30 14:59:39,564 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 14:59:39,567 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 14:59:40,109 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 14:59:41,109 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:41,773 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:59:41,773 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 14:59:45,446 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 14:59:51,113 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 14:59:57,156 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 14:59:57,156 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:00:02,118 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:00:12,123 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:00:12,442 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:00:12,443 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:00:15,517 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:00:18,125 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:00:27,897 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:00:27,897 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:00:28,130 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:00:34,133 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:00:38,134 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:00:42,136 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:00:43,207 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:00:43,207 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:00:45,585 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:00:49,139 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:00:57,143 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:00:58,493 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:00:58,494 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:01:01,145 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:05,147 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:09,148 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:13,150 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:13,789 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:01:13,790 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:01:15,657 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:01:20,153 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:24,155 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:27,156 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:29,080 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:01:29,081 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:01:31,158 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:37,161 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:41,163 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:44,321 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:01:44,321 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:01:45,165 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:45,736 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:01:49,166 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:51,167 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:55,169 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:57,174 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:01:59,557 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:01:59,557 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:02:01,175 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:05,177 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:07,178 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:10,179 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:12,180 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:14,825 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:02:14,826 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:02:15,834 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:02:16,182 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:18,183 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:22,185 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:24,186 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:26,187 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:28,188 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:30,059 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:02:30,060 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:02:30,189 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:32,190 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:34,191 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:36,192 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:38,193 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:40,194 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:42,195 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:44,196 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:45,221 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:02:45,221 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:02:45,922 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:02:50,198 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:02:53,200 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:03:00,663 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:03:00,663 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:03:01,203 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:03:09,206 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:03:15,209 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:03:15,997 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:03:16,048 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:03:16,048 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:03:21,211 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:03:28,214 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:03:31,294 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:03:31,295 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:03:34,217 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:03:38,219 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:03:44,222 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:03:46,072 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:03:46,536 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:03:46,536 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:03:48,223 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:03:52,225 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:03:59,229 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:01,717 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:04:01,717 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:04:03,230 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:07,232 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:11,234 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:15,236 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:16,147 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:04:17,008 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:04:17,008 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:04:19,238 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:21,239 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:25,241 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:30,243 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:32,278 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:04:32,278 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:04:34,245 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:38,246 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:40,247 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:44,249 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:46,222 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:04:46,250 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:47,486 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:04:47,486 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:04:50,252 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:52,252 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:56,254 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:04:58,255 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:02,257 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:02,667 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:05:02,667 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:05:04,258 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:07,259 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:09,260 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:11,261 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:15,263 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:16,300 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:05:17,264 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:17,882 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:05:17,882 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:05:19,265 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:21,266 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:23,267 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:25,267 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:27,268 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:29,269 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:31,270 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:33,034 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:05:33,035 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:05:33,271 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:35,272 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:37,273 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:37,919 DEBUG SenderThread:2277729 [sender.py:send():234] send: history +2022-07-30 15:05:37,923 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 15:05:37,926 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 15:05:38,274 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 15:05:39,274 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:46,373 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:05:48,372 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:05:48,373 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:05:49,278 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:05:56,281 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:02,284 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:03,721 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:06:03,722 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:06:08,287 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:14,289 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:16,448 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:06:19,023 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:06:19,023 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:06:20,292 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:24,294 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:31,297 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:34,268 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:06:34,269 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:06:35,299 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:41,301 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:45,303 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:46,522 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:06:49,305 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:49,515 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:06:49,515 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:06:53,307 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:06:57,309 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:02,310 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:04,832 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:07:04,833 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:07:06,312 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:10,314 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:14,316 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:16,317 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:16,595 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:07:20,076 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:07:20,077 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:07:20,319 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:24,320 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:26,321 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:30,323 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:32,324 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:35,982 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:07:35,982 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:07:37,326 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:39,327 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:43,329 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:45,330 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:46,670 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:07:49,332 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:51,168 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:07:51,168 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:07:51,333 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:53,334 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:55,334 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:07:59,336 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:01,337 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:03,338 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:05,339 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:06,355 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:08:06,355 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:08:07,340 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:09,341 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:11,342 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:13,343 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:15,344 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:16,746 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:08:17,345 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:19,346 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:21,347 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:21,575 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:08:21,576 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:08:23,348 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:25,349 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:36,864 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:08:36,864 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:08:37,354 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:43,357 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:46,823 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:08:52,347 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:08:52,347 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:08:52,361 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:08:58,363 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:02,365 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:07,646 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:09:07,647 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:09:08,368 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:12,369 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:16,900 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:09:19,372 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:22,941 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:09:22,941 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:09:23,374 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:27,376 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:33,379 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:37,381 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:38,261 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:09:38,261 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:09:41,382 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:45,384 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:46,978 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:09:49,386 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:53,469 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:09:53,470 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:09:54,389 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:09:58,390 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:00,392 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:04,393 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:08,395 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:08,735 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:10:08,735 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:10:12,397 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:14,398 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:17,055 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:10:18,400 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:22,402 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:24,051 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:10:24,051 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:10:24,402 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:28,404 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:30,405 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:32,406 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:37,408 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:39,290 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:10:39,291 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:10:39,409 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:41,410 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:45,412 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:47,133 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:10:47,413 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:49,414 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:51,415 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:53,416 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:54,480 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:10:54,480 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:10:55,417 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:57,418 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:10:59,419 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:01,420 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:03,421 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:05,422 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:07,423 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:09,424 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:09,660 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:11:09,660 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:11:11,425 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:13,417 DEBUG SenderThread:2277729 [sender.py:send():234] send: history +2022-07-30 15:11:13,421 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 15:11:13,424 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 15:11:13,425 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 15:11:13,426 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:15,426 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:17,207 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:11:24,430 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:24,977 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:11:24,977 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:11:32,433 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:39,436 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:40,456 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:11:40,457 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:11:45,439 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:47,283 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:11:49,441 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:55,443 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:11:55,697 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:11:55,698 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:12:02,447 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:06,448 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:10,991 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:12:10,992 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:12:12,451 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:16,453 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:17,359 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:12:20,455 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:24,456 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:26,422 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:12:26,422 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:12:28,458 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:33,460 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:37,462 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:41,464 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:41,621 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:12:41,621 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:12:45,466 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:47,434 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:12:49,468 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:51,469 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:55,470 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:12:56,891 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:12:56,891 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:12:59,472 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:03,474 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:05,475 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:09,477 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:11,478 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:12,122 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:13:12,122 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:13:16,480 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:17,510 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:13:18,481 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:22,483 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:24,484 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:26,485 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:27,315 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:13:27,315 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:13:30,487 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:32,488 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:34,489 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:36,490 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:38,491 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:40,492 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:42,493 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:42,570 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:13:42,571 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:13:44,494 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:46,495 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:47,587 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:13:48,496 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:50,497 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:52,498 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:54,499 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:56,500 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:13:57,800 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:13:57,801 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:13:59,502 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:01,503 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:03,504 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:11,507 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:13,137 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:14:13,138 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:14:17,661 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:14:19,511 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:25,513 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:28,386 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:14:28,387 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:14:32,516 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:38,519 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:42,521 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:43,657 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:14:43,658 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:14:47,735 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:14:48,523 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:52,525 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:58,528 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:14:58,920 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:14:58,921 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:15:03,530 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:07,531 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:11,533 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:14,108 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:15:14,109 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:15:15,535 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:17,809 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:15:21,538 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:25,540 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:27,541 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:29,340 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:15:29,341 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:15:32,543 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:36,544 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:40,546 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:42,547 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:44,549 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:15:44,549 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:15:46,549 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:47,885 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:15:50,551 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:54,553 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:56,554 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:15:59,798 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:15:59,798 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:16:00,556 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:02,557 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:04,558 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:08,560 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:11,561 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:15,040 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:16:15,040 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:16:15,563 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:17,564 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:17,953 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:16:19,565 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:21,566 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:23,567 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:27,569 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:30,738 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:16:30,738 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:16:31,571 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:33,572 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:37,574 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:39,575 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:41,576 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:43,577 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:45,578 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:45,945 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:16:45,945 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:16:47,579 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:48,016 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:16:49,580 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:50,678 DEBUG SenderThread:2277729 [sender.py:send():234] send: history +2022-07-30 15:16:50,681 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 15:16:50,684 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 15:16:51,581 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 15:16:51,581 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:16:53,582 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:17:01,248 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:17:01,248 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:17:01,585 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:17:10,590 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:17:16,592 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:17:16,637 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:17:16,637 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:17:18,081 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:17:22,595 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:17:28,597 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:17:31,977 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:17:31,978 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:17:33,600 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:17:37,601 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:17:43,604 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:17:47,303 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:17:47,303 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:17:47,606 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:17:48,151 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:17:53,608 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:17:57,610 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:01,611 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:02,588 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:18:02,588 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:18:06,614 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:10,615 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:14,617 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:17,854 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:18:17,854 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:18:18,221 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:18:18,619 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:22,621 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:24,622 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:28,624 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:32,626 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:33,059 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:18:33,059 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:18:36,627 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:38,628 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:42,630 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:45,631 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:48,294 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:18:48,298 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:18:48,299 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:18:49,634 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:51,634 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:55,636 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:18:57,637 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:01,639 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:03,514 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:19:03,514 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:19:03,640 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:05,641 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:07,642 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:11,644 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:13,645 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:15,646 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:17,648 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:18,371 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:19:18,687 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:19:18,688 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:19:19,649 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:21,650 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:23,651 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:25,651 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:27,652 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:29,653 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:31,654 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:33,655 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:33,870 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:19:33,871 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:19:35,656 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:38,657 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:48,445 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:19:48,662 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:19:49,134 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:19:49,134 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:19:56,665 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:20:04,605 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:20:04,605 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:20:05,670 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:20:11,672 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:20:18,520 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:20:19,884 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:20:19,885 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:20:21,677 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:20:27,679 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:20:31,681 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:20:35,097 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:20:35,098 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:20:37,684 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:20:46,688 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:20:48,595 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:20:50,378 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:20:50,378 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:20:50,690 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:20:54,692 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:20:58,694 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:02,696 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:05,549 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:21:05,550 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:21:06,698 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:11,700 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:15,702 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:18,672 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:21:19,704 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:20,763 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:21:20,763 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:21:23,706 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:29,708 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:33,710 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:35,980 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:21:35,981 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:21:37,712 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:39,713 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:43,715 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:45,716 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:48,748 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:21:50,718 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:51,217 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:21:51,218 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:21:52,719 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:56,721 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:21:58,722 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:00,723 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:04,725 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:06,443 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:22:06,443 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:22:06,726 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:08,727 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:12,729 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:14,730 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:16,731 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:18,732 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:18,817 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:22:20,733 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:21,645 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:22:21,645 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:22:22,735 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:24,736 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:26,737 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:28,738 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:30,739 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:32,740 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:35,742 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:36,843 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:22:36,843 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:22:37,743 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:39,744 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:40,772 DEBUG SenderThread:2277729 [sender.py:send():234] send: history +2022-07-30 15:22:40,775 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 15:22:40,778 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 15:22:41,745 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 15:22:41,745 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:43,746 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:48,884 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:22:52,045 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:22:52,045 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:22:52,750 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:22:58,753 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:06,756 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:07,473 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:23:07,474 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:23:10,758 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:16,761 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:18,953 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:23:22,806 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:23:22,806 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:23:23,764 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:29,767 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:33,769 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:37,771 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:38,146 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:23:38,147 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:23:43,773 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:47,775 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:49,023 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:23:51,777 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:53,354 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:23:53,354 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:23:55,779 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:23:59,781 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:03,782 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:08,587 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:24:08,588 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:24:08,784 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:12,786 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:16,789 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:18,790 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:19,093 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:24:22,792 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:23,901 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:24:23,901 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:24:26,794 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:32,797 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:34,798 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:38,800 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:39,124 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:24:39,124 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:24:40,801 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:44,804 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:46,805 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:48,806 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:49,164 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:24:52,808 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:54,410 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:24:54,411 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:24:54,809 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:24:56,810 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:00,812 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:02,813 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:04,814 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:06,815 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:09,570 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:25:09,570 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:25:09,816 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:11,817 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:13,819 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:15,820 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:17,821 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:19,236 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:25:19,822 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:21,823 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:23,824 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:24,729 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:25:24,730 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:25:25,825 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:27,826 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:29,828 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:31,829 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:39,832 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:40,055 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:25:40,055 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:25:46,836 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:49,308 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:25:52,838 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:25:55,467 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:25:55,468 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:25:58,841 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:06,845 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:10,786 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:26:10,786 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:26:10,846 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:16,849 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:19,377 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:26:21,851 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:26,076 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:26:26,077 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:26:27,854 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:31,856 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:35,858 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:39,860 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:41,341 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:26:41,342 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:26:43,861 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:47,863 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:49,450 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:26:51,865 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:26:55,867 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:00,870 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:01,432 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:27:01,432 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:27:04,871 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:08,873 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:12,875 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:14,877 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:16,960 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:27:16,960 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:27:18,878 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:19,525 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:27:22,881 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:24,882 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:28,883 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:30,885 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:32,136 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:27:32,137 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:27:32,886 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:36,888 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:38,889 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:42,891 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:44,892 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:47,327 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:27:47,328 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:27:47,893 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:49,601 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:27:49,894 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:53,896 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:55,897 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:57,898 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:27:59,899 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:01,900 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:02,586 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:28:02,586 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:28:03,901 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:05,902 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:07,903 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:09,904 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:11,905 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:13,907 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:15,908 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:16,726 DEBUG SenderThread:2277729 [sender.py:send():234] send: history +2022-07-30 15:28:16,730 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 15:28:16,734 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 15:28:16,908 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 15:28:17,909 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:17,981 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:28:17,981 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:28:18,909 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:19,676 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:28:27,914 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:33,216 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:28:33,217 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:28:34,917 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:42,921 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:48,564 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:28:48,564 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:28:48,923 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:49,750 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:28:54,926 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:28:58,928 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:03,830 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:29:03,831 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:29:05,931 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:09,933 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:15,936 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:19,089 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:29:19,090 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:29:19,825 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:29:19,937 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:23,939 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:27,941 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:32,943 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:34,344 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:29:34,345 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:29:36,944 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:40,946 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:44,948 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:48,950 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:49,680 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:29:49,680 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:29:49,900 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:29:52,951 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:56,953 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:29:58,954 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:02,956 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:04,954 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:30:04,954 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:30:07,959 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:09,960 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:13,962 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:15,963 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:19,965 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:19,978 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:30:20,252 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:30:20,253 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:30:21,966 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:25,967 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:27,969 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:29,970 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:31,971 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:35,502 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:30:35,503 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:30:35,972 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:37,974 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:39,974 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:41,975 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:43,976 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:45,977 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:47,978 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:49,979 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:50,054 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:30:50,682 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:30:50,682 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:30:51,980 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:53,981 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:55,982 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:30:58,983 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:00,984 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:02,985 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:04,986 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:06,000 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:31:06,000 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:31:06,988 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:14,991 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:20,129 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:31:21,339 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:31:21,339 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:31:22,995 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:29,998 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:36,001 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:36,649 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:31:36,649 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:31:40,003 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:46,005 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:50,205 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:31:51,961 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:31:51,961 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:31:53,009 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:31:59,012 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:03,014 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:07,015 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:07,213 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:32:07,213 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:32:14,018 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:18,020 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:20,280 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:32:22,022 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:22,483 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:32:22,484 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:32:26,024 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:30,026 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:34,027 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:37,761 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:32:37,761 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:32:38,029 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:40,030 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:44,032 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:49,034 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:50,355 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:32:53,036 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:53,074 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:32:53,074 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:32:55,037 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:32:59,039 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:03,041 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:05,042 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:07,043 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:08,269 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:33:08,269 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:33:11,045 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:13,046 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:17,048 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:19,049 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:20,429 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:33:22,051 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:23,519 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:33:23,520 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:33:26,053 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:28,054 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:30,055 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:32,056 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:34,057 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:36,058 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:38,059 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:38,720 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:33:38,720 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:33:40,060 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:42,061 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:44,062 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:46,063 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:48,064 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:50,065 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:50,503 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:33:52,066 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:53,682 DEBUG SenderThread:2277729 [sender.py:send():234] send: history +2022-07-30 15:33:53,686 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 15:33:53,690 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 15:33:54,067 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 15:33:54,067 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:33:54,464 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:33:54,464 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:33:56,068 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:34:05,073 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:34:09,786 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:34:09,786 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:34:13,076 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:34:19,079 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:34:20,568 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:34:25,081 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:34:25,333 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:34:25,333 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:34:32,084 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:34:36,086 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:34:40,632 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:34:40,632 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:34:42,089 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:34:46,090 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:34:50,092 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:34:50,635 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:34:55,952 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:34:55,952 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:34:56,095 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:00,097 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:04,098 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:09,100 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:11,193 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:35:11,193 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:35:13,102 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:17,104 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:20,704 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:35:21,105 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:25,107 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:26,383 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:35:26,384 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:35:29,109 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:33,111 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:35,111 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:40,113 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:41,617 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:35:41,617 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:35:44,115 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:46,116 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:50,118 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:50,781 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:35:52,119 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:56,120 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:35:56,840 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:35:56,841 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:35:58,121 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:02,123 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:04,124 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:06,125 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:08,126 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:12,060 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:36:12,060 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:36:12,127 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:14,128 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:17,130 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:19,131 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:20,858 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:36:21,132 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:23,132 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:25,133 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:27,134 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:27,232 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:36:27,232 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:36:29,135 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:31,136 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:33,137 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:35,138 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:37,139 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:39,140 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:41,141 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:42,517 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:36:42,517 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:36:50,932 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:36:53,146 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:36:57,731 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:36:57,732 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:37:00,149 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:06,151 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:12,153 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:13,096 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:37:13,096 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:37:18,156 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:21,008 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:37:24,158 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:28,160 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:28,479 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:37:28,479 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:37:35,163 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:39,165 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:43,166 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:43,769 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:37:43,770 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:37:47,168 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:51,086 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:37:51,170 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:55,171 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:37:59,085 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:37:59,085 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:37:59,173 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:03,175 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:08,177 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:12,179 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:14,347 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:38:14,348 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:38:16,180 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:20,182 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:21,161 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:38:22,183 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:26,184 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:29,576 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:38:29,577 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:38:30,186 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:32,187 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:36,189 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:38,189 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:42,191 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:44,192 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:44,811 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:38:44,812 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:38:46,193 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:50,194 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:51,234 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:38:52,195 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:55,196 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:38:59,198 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:00,026 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:39:00,026 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:39:01,199 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:03,200 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:05,201 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:07,202 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:09,203 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:11,204 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:13,205 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:15,206 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:15,215 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status +2022-07-30 15:39:15,215 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status +2022-07-30 15:39:17,207 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:19,208 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:21,309 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:39:23,210 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:25,211 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:27,212 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:28,835 DEBUG SenderThread:2277729 [sender.py:send():234] send: history +2022-07-30 15:39:28,838 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 15:39:28,841 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 15:39:29,180 DEBUG SenderThread:2277729 [sender.py:send():234] send: history +2022-07-30 15:39:29,183 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 15:39:29,186 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 15:39:29,213 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 15:39:29,213 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:31,213 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:31,395 DEBUG SenderThread:2277729 [sender.py:send():234] send: telemetry +2022-07-30 15:39:31,395 DEBUG SenderThread:2277729 [sender.py:send():234] send: exit +2022-07-30 15:39:31,395 INFO SenderThread:2277729 [sender.py:send_exit():366] handling exit code: 1 +2022-07-30 15:39:31,397 INFO SenderThread:2277729 [sender.py:send_exit():368] handling runtime: 6205 +2022-07-30 15:39:31,399 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:31,399 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 15:39:31,400 INFO SenderThread:2277729 [sender.py:send_exit():374] send defer +2022-07-30 15:39:31,400 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:31,400 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer +2022-07-30 15:39:31,401 INFO HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 0 +2022-07-30 15:39:31,401 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: defer +2022-07-30 15:39:31,401 INFO SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 0 +2022-07-30 15:39:31,401 INFO SenderThread:2277729 [sender.py:transition_state():387] send defer: 1 +2022-07-30 15:39:31,401 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer +2022-07-30 15:39:31,401 INFO HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 1 +2022-07-30 15:39:31,435 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: defer +2022-07-30 15:39:31,435 INFO SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 1 +2022-07-30 15:39:31,435 INFO SenderThread:2277729 [sender.py:transition_state():387] send defer: 2 +2022-07-30 15:39:31,435 DEBUG SenderThread:2277729 [sender.py:send():234] send: stats +2022-07-30 15:39:31,435 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer +2022-07-30 15:39:31,436 INFO HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 2 +2022-07-30 15:39:31,436 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: defer +2022-07-30 15:39:31,436 INFO SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 2 +2022-07-30 15:39:31,436 INFO SenderThread:2277729 [sender.py:transition_state():387] send defer: 3 +2022-07-30 15:39:31,436 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer +2022-07-30 15:39:31,436 INFO HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 3 +2022-07-30 15:39:31,438 DEBUG SenderThread:2277729 [sender.py:send():234] send: summary +2022-07-30 15:39:31,441 INFO SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end +2022-07-30 15:39:31,442 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: defer +2022-07-30 15:39:31,442 INFO SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 3 +2022-07-30 15:39:31,442 INFO SenderThread:2277729 [sender.py:transition_state():387] send defer: 4 +2022-07-30 15:39:31,442 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer +2022-07-30 15:39:31,442 INFO HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 4 +2022-07-30 15:39:31,442 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: defer +2022-07-30 15:39:31,442 INFO SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 4 +2022-07-30 15:39:31,502 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:31,657 INFO SenderThread:2277729 [sender.py:transition_state():387] send defer: 5 +2022-07-30 15:39:31,658 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:31,658 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer +2022-07-30 15:39:31,658 INFO HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 5 +2022-07-30 15:39:31,658 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: defer +2022-07-30 15:39:31,658 INFO SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 5 +2022-07-30 15:39:31,658 INFO SenderThread:2277729 [dir_watcher.py:finish():283] shutting down directory watcher +2022-07-30 15:39:31,759 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:32,214 INFO Thread-8 :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 15:39:32,214 INFO SenderThread:2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml +2022-07-30 15:39:32,215 INFO SenderThread:2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:32,215 INFO SenderThread:2277729 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files +2022-07-30 15:39:32,215 INFO SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml config.yaml +2022-07-30 15:39:32,215 INFO SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/diff.patch diff.patch +2022-07-30 15:39:32,215 INFO SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt requirements.txt +2022-07-30 15:39:32,216 INFO SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log output.log +2022-07-30 15:39:32,216 INFO SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json wandb-summary.json +2022-07-30 15:39:32,217 INFO SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-metadata.json wandb-metadata.json +2022-07-30 15:39:32,225 INFO SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py +2022-07-30 15:39:32,225 INFO SenderThread:2277729 [sender.py:transition_state():387] send defer: 6 +2022-07-30 15:39:32,225 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:32,229 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer +2022-07-30 15:39:32,229 INFO HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 6 +2022-07-30 15:39:32,229 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: defer +2022-07-30 15:39:32,229 INFO SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 6 +2022-07-30 15:39:32,229 INFO SenderThread:2277729 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 15:39:32,327 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:32,328 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:32,429 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:32,430 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:32,531 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:32,532 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:32,633 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:32,634 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:32,730 INFO Thread-15 :2277729 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt +2022-07-30 15:39:32,735 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:32,735 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:32,741 INFO Thread-14 :2277729 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml +2022-07-30 15:39:32,764 INFO Thread-17 :2277729 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json +2022-07-30 15:39:32,837 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:32,837 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:32,922 INFO Thread-16 :2277729 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log +2022-07-30 15:39:32,939 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:32,939 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:33,041 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:33,041 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:33,123 INFO Thread-7 :2277729 [sender.py:transition_state():387] send defer: 7 +2022-07-30 15:39:33,123 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer +2022-07-30 15:39:33,123 INFO HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 7 +2022-07-30 15:39:33,124 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: defer +2022-07-30 15:39:33,124 INFO SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 7 +2022-07-30 15:39:33,143 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:34,381 INFO SenderThread:2277729 [sender.py:transition_state():387] send defer: 8 +2022-07-30 15:39:34,382 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:34,382 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer +2022-07-30 15:39:34,383 INFO HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 8 +2022-07-30 15:39:34,383 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: defer +2022-07-30 15:39:34,383 INFO SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 8 +2022-07-30 15:39:34,383 INFO SenderThread:2277729 [sender.py:transition_state():387] send defer: 9 +2022-07-30 15:39:34,383 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer +2022-07-30 15:39:34,384 INFO HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 9 +2022-07-30 15:39:34,384 DEBUG SenderThread:2277729 [sender.py:send():234] send: final +2022-07-30 15:39:34,384 DEBUG SenderThread:2277729 [sender.py:send():234] send: footer +2022-07-30 15:39:34,384 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: defer +2022-07-30 15:39:34,384 INFO SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 9 +2022-07-30 15:39:34,484 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit +2022-07-30 15:39:34,484 DEBUG SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit +2022-07-30 15:39:34,484 INFO SenderThread:2277729 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 15:39:34,744 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: get_summary +2022-07-30 15:39:34,749 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: sampled_history +2022-07-30 15:39:34,750 DEBUG HandlerThread:2277729 [handler.py:handle_request():130] handle_request: shutdown +2022-07-30 15:39:34,750 INFO HandlerThread:2277729 [handler.py:finish():731] shutting down handler +2022-07-30 15:39:35,384 INFO WriterThread:2277729 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/run-y1b5rbiq.wandb +2022-07-30 15:39:35,742 INFO SenderThread:2277729 [sender.py:finish():1070] shutting down sender +2022-07-30 15:39:35,743 INFO SenderThread:2277729 [file_pusher.py:finish():177] shutting down file pusher +2022-07-30 15:39:35,743 INFO SenderThread:2277729 [file_pusher.py:join():182] waiting for file pusher +2022-07-30 15:39:35,746 INFO MainThread:2277729 [internal.py:handle_exit():77] Internal process exited diff --git a/wandb/run-20220730_135604-y1b5rbiq/logs/debug.log b/wandb/run-20220730_135604-y1b5rbiq/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..b0afc77472903f407fa6605df693dbafae082e0d --- /dev/null +++ b/wandb/run-20220730_135604-y1b5rbiq/logs/debug.log @@ -0,0 +1,150 @@ +2022-07-30 13:56:04,286 INFO MainThread:2276371 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'} +2022-07-30 13:56:04,286 INFO MainThread:2276371 [wandb_setup.py:_flush():71] setting login settings: {} +2022-07-30 13:56:04,286 INFO MainThread:2276371 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/logs/debug.log +2022-07-30 13:56:04,286 INFO MainThread:2276371 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/logs/debug-internal.log +2022-07-30 13:56:04,286 INFO MainThread:2276371 [wandb_init.py:init():404] calling init triggers +2022-07-30 13:56:04,286 INFO MainThread:2276371 [wandb_init.py:init():409] wandb.init called with sweep_config: {} +config: {} +2022-07-30 13:56:04,286 INFO MainThread:2276371 [wandb_init.py:init():460] starting backend +2022-07-30 13:56:04,286 INFO MainThread:2276371 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-07-30 13:56:04,339 INFO MainThread:2276371 [backend.py:ensure_launched():216] starting backend process... +2022-07-30 13:56:04,382 INFO MainThread:2276371 [backend.py:ensure_launched():221] started backend process with pid: 2277729 +2022-07-30 13:56:04,384 INFO MainThread:2276371 [wandb_init.py:init():469] backend started and connected +2022-07-30 13:56:04,398 INFO MainThread:2276371 [wandb_init.py:init():533] updated telemetry +2022-07-30 13:56:04,503 INFO MainThread:2276371 [wandb_init.py:init():563] communicating current version +2022-07-30 13:56:05,225 INFO MainThread:2276371 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-07-30 13:56:05,225 INFO MainThread:2276371 [wandb_init.py:init():578] communicating run to backend with 30 second timeout +2022-07-30 13:56:05,409 INFO MainThread:2276371 [wandb_init.py:init():606] starting run threads in backend +2022-07-30 13:56:07,930 INFO MainThread:2276371 [wandb_run.py:_console_start():1810] atexit reg +2022-07-30 13:56:07,930 INFO MainThread:2276371 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT +2022-07-30 13:56:07,931 INFO MainThread:2276371 [wandb_run.py:_redirect():1689] Redirecting console. +2022-07-30 13:56:07,933 INFO MainThread:2276371 [wandb_run.py:_redirect():1745] Redirects installed. +2022-07-30 13:56:07,933 INFO MainThread:2276371 [wandb_init.py:init():633] run started, returning control to user process +2022-07-30 15:39:28,849 INFO MainThread:2276371 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1 +2022-07-30 15:39:29,174 INFO MainThread:2276371 [wandb_run.py:_restore():1752] restore +2022-07-30 15:39:31,401 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 74351 +} + +2022-07-30 15:39:31,658 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 2 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 74351 +} + +2022-07-30 15:39:32,226 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 400083 +} + +2022-07-30 15:39:32,329 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 74351 + total_bytes: 400083 +} + +2022-07-30 15:39:32,431 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 400083 + total_bytes: 400083 +} + +2022-07-30 15:39:32,532 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 400083 + total_bytes: 400083 +} + +2022-07-30 15:39:32,634 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 400083 + total_bytes: 400083 +} + +2022-07-30 15:39:32,736 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 400083 + total_bytes: 400083 +} + +2022-07-30 15:39:32,838 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 400083 + total_bytes: 400083 +} + +2022-07-30 15:39:32,940 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 400083 + total_bytes: 400083 +} + +2022-07-30 15:39:33,042 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 400083 + total_bytes: 400083 +} + +2022-07-30 15:39:34,383 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 400083 + total_bytes: 400083 +} + +2022-07-30 15:39:34,743 INFO MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true +exit_result { +} +file_counts { + wandb_count: 6 + other_count: 1 +} +pusher_stats { + uploaded_bytes: 400083 + total_bytes: 400083 +} +local_info { +} + +2022-07-30 15:39:36,428 INFO MainThread:2276371 [wandb_run.py:_append_history():2130] rendering history +2022-07-30 15:39:36,428 INFO MainThread:2276371 [wandb_run.py:_append_summary():2085] rendering summary +2022-07-30 15:39:36,429 INFO MainThread:2276371 [wandb_run.py:_append_files():2180] logging synced files diff --git a/wandb/run-20220730_135604-y1b5rbiq/run-y1b5rbiq.wandb b/wandb/run-20220730_135604-y1b5rbiq/run-y1b5rbiq.wandb new file mode 100644 index 0000000000000000000000000000000000000000..b0633fd56489a89073b8644ab56eb9287268163a --- /dev/null +++ b/wandb/run-20220730_135604-y1b5rbiq/run-y1b5rbiq.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db402d0277fed45bac85e9335ca1cef51f9db3b99416ba9b9db31b7b619b4d1d +size 1437107 diff --git a/wandb/run-20220730_174606-j2u4n7h4/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_174606-j2u4n7h4/files/code/run_flax_speech_recognition_ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..11df8fb90ea54a20f8f34bbb40442193e151ddc2 --- /dev/null +++ b/wandb/run-20220730_174606-j2u4n7h4/files/code/run_flax_speech_recognition_ctc.py @@ -0,0 +1,1604 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. + +import logging +import math +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Union + +import datasets +import numpy as np +from datasets import DatasetDict, load_dataset, load_metric +from tqdm import tqdm + +import flax +import jax +import jax.numpy as jnp +import optax +import transformers +import wandb as wandb +from flax import core, jax_utils, struct, traverse_util +from flax.jax_utils import unreplicate, pad_shard_unpad +from flax.training.common_utils import get_metrics, shard, shard_prng_key +from huggingface_hub import Repository +from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC +from optax._src import linear_algebra +from transformers import ( + AutoFeatureExtractor, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + TrainingArguments, + is_tensorboard_available, + set_seed, +) +from transformers.file_utils import get_full_repo_name +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@flax.struct.dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + feature_extractor_name: Optional[str] = field( + default=None, metadata={"help": "feature extractor name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + freeze_feature_encoder: bool = field( + default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} + ) + attention_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} + ) + activation_dropout: float = field( + default=0.1, + metadata={ + "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler." + }, + ) + hidden_dropout: float = field( + default=0.1, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + feat_proj_dropout: float = field( + default=0.0, + metadata={ + "help": "The feat proj dropout probability for feature encoder representations." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.1, + metadata={ + "help": "The spec aug dropout probability for feature encoder representations." + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) + ctc_loss_reduction: Optional[str] = field( + default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} + ) + ctc_zero_infinity: Optional[bool] = field( + default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."} + ) + + +@flax.struct.dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_label_length: Optional[int] = field( + default=512, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + min_label_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + pad_input_to_multiple_of: Optional[int] = field( + default=32000, + metadata={ + "help": "If set will pad the input sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + pad_target_to_multiple_of: Optional[int] = field( + default=None, + metadata={ + "help": "If set will pad the target sequence to a multiple of the provided value. " + "This is important to avoid triggering recompilations on TPU." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="flax-speech-recognition-ctc", + metadata={"help": "The name of the wandb project."}, + ) + wandb_name: str = field( + default=None, + metadata={"help": "The name of the wandb run."}, + ) + wandb_job_type: str = field( + default="CTC", + metadata={"help": "The name of the wandb job type."}, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + remove_punctuation: bool = field( + default=False, metadata={"help": "Whether or not to remove punctuation during training."} + ) + + +# @flax.struct.dataclass +@dataclass +class FlaxTrainingArguments(TrainingArguments): + precision: str = field( + default="full", + metadata={ + "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision" + "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**" + }, + ) + matmul_precision: str = field( + default="default", + metadata={ + "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. " + "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). " + "This configuration option does not change the behaviours of such calls with explicit precision arguments; " + "it only changes the behaviors of calls with no such argument provided. " + "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`." + }, + ) + multisteps: bool = field( + default=False, + metadata={ + "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, " + "a custom gradient accumulation implementation will be employed." + }, + ) + + +def to_fp32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + + +def to_bf16(t): + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) + + +class MixedPrecisionTrainState(struct.PyTreeNode): + """Train state for use with a single Optax optimizer. + Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py + + Synopsis:: + + state = TrainState.create( + apply_fn=model.apply, + params=variables['params'], + tx=tx) + grad_fn = jax.grad(make_loss_fn(state.apply_fn)) + for batch in data: + grads = grad_fn(state.params, batch) + state = state.apply_gradients(grads=grads) + + Args: + step: Counter starts at 0 and is incremented by every call to + `.apply_gradients()`. + apply_fn: Usually set to `model.apply()`. Kept in this dataclass for + convenience to have a shorter params list for the `train_step()` function + in your training loop. + params: The parameters to be updated by `tx` and used by `apply_fn`. + tx: An Optax gradient transformation. + opt_state: The state for `tx`. + dropout_rng: PRNG key for stochastic operations. + bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. + """ + + step: int + apply_fn: Callable = struct.field(pytree_node=False) + get_attention_mask_fn: Callable = struct.field(pytree_node=False) + params: core.FrozenDict[str, Any] + tx: optax.GradientTransformation = struct.field(pytree_node=False) + opt_state: optax.OptState + dropout_rng: jnp.ndarray + max_grad_norm: Optional[float] = 1.0 + + def apply_gradients(self, *, grads, to_dtype, **kwargs): + """Updates `step`, `params`, `opt_state` and `**kwargs` in return value. + + Note that internally this function calls `.tx.update()` followed by a call + to `optax.apply_updates()` to update `params` and `opt_state`. + + Args: + grads: Gradients that have the same pytree structure as `.params`. + **kwargs: Additional dataclass attributes that should be `.replace()`-ed. + + Returns: + An updated instance of `self` with `step` incremented by one, `params` + and `opt_state` updated by applying `grads`, and additional attributes + replaced as specified by `kwargs`. + """ + + # clip gradients by global l2 norm + casted_max_grad_norm = to_dtype(self.max_grad_norm) + g_norm = linear_algebra.global_norm(grads) + g_norm = jnp.maximum(casted_max_grad_norm, g_norm) + grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads) + + # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training + # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is) + updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params) + + new_params = optax.apply_updates(self.params, updates) + return self.replace( + step=self.step + 1, + params=new_params, + opt_state=to_dtype(new_opt_state), + **kwargs, + ) + + @classmethod + def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs): + """Creates a new instance with `step=0` and initialized `opt_state`.""" + # downcast optimizer state to bf16 if mixed-precision training + opt_state = tx.init(to_dtype(params)) if tx is not None else None + return cls( + step=0, + apply_fn=apply_fn, + params=params, + tx=tx, + opt_state=opt_state, + **kwargs, + ) + + def replicate(self): + return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng)) + + +@flax.struct.dataclass +class FlaxDataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`Wav2Vec2Processor`]) + The processor used for proccessing the data. + decoder_start_token_id (:obj: `int`) + The begin-of-sentence of the decoder. + input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned input sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned target sequences (according to the model's padding side and padding index). + See above for details. + max_input_length (:obj:`float`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + pad_input_to_multiple_of (:obj:`int`, `optional`): + If set will pad the input sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + pad_target_to_multiple_of (:obj:`int`, `optional`): + If set will pad the target sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: Any + input_padding: Union[bool, str] = "longest" + label_padding: Union[bool, str] = "max_length" + pad_input_to_multiple_of: Optional[int] = None + pad_to_multiple_of_label: Optional[int] = None + max_input_length: Optional[float] = None + max_label_length: Optional[float] = None + + def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + input_features = [{"input_values": feature["input_values"]} for feature in features] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + # reformat list to dict and set to pytorch format + batch = self.processor.feature_extractor.pad( + input_features, + max_length=self.max_input_length, + padding=self.input_padding, + pad_to_multiple_of=self.pad_input_to_multiple_of, + return_tensors="np", + ) + + labels_batch = self.processor.tokenizer.pad( + label_features, + max_length=self.max_label_length, + padding=self.label_padding, + pad_to_multiple_of=self.pad_to_multiple_of_label, + return_tensors="np", + ) + + labels = labels_batch["input_ids"] + labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1)) + labels = labels.filled(fill_value=-100) + + batch["labels"] = labels + + return batch + + +def get_grouped_indices( + dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None +) -> np.array: + """ + Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486) + Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar + lengths. To do this, the indices are: + + - randomly permuted (if a JAX rng is specified) + - grouped in mega-batches of size `mega_batch_mult * batch_size` + - sorted by length in each mega-batch + + The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of + maximum length placed first, so that an OOM happens sooner rather than later. + """ + lengths = dataset["input_length"] + + # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller. + if mega_batch_mult is None: + mega_batch_mult = min(len(lengths) // (batch_size * 4), 50) + # Just in case, for tiny datasets + if mega_batch_mult == 0: + mega_batch_mult = 1 + + # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler. + num_samples = len(lengths) + indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples) + + megabatch_size = mega_batch_mult * batch_size + megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] + megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + + # The rest is to get the biggest batch first. + # Since each megabatch is sorted by descending length, the longest element is the first + megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches] + max_idx = np.argmax(megabatch_maximums).item() + # Switch to put the longest batch in first position + # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch) + megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0] + + megabatches = np.array([i for megabatch in megabatches for i in megabatch]) + + return megabatches + + +def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray: + """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by + the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned.""" + num_samples = len(samples_idx) + if drop_last: + samples_to_remove = num_samples % batch_size + if samples_to_remove != 0: + samples_idx = samples_idx[:-samples_to_remove] + sections_split = num_samples // batch_size + samples_idx = samples_idx.reshape((sections_split, batch_size)) + else: + sections_split = math.ceil(num_samples / batch_size) + samples_idx = np.array_split(samples_idx, sections_split) + return samples_idx + + +def write_train_metric(summary_writer, train_metrics, train_time, step): + summary_writer.scalar("train_time", train_time, step) + + train_metrics = get_metrics(train_metrics) + for key, vals in train_metrics.items(): + tag = f"train_{key}" + for i, val in enumerate(vals): + summary_writer.scalar(tag, val, step - len(vals) + i + 1) + + +def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None): + for metric_name, value in eval_metrics.items(): + summary_writer.scalar(f"eval_{metric_name}", value, step) + + if pred_str is not None: + # write output actual predictions for debugging + summary_writer.text("eval_predictions", "\n".join(pred_str), step) + + +def write_wandb_log(metrics, step, prefix=None): + if jax.process_index() == 0: + log_metrics = {} + for k, v in metrics.items(): + if "layer" in k: + log_metrics[f"{k}/"] = v + elif prefix is not None: + log_metrics[f"{prefix}/{k}"] = v + else: + log_metrics[k] = v + wandb.log(log_metrics, step) + + +def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"): + if jax.process_index() == 0: + # convert str data to a wandb compatible format + str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))] + # we'll log the first 50 predictions for each epoch + wandb.log( + { + f"{prefix}/step_{int(step / 1000)}k": wandb.Table( + columns=["label_str", "pred_str"], data=str_data[:num_log] + ) + }, + step, + ) + + +def create_learning_rate_fn( + num_train_steps: int, num_warmup_steps: int, learning_rate: float +) -> Callable[[int], jnp.array]: + """Returns a linear warmup, linear_decay learning rate function.""" + warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps) + decay_fn = optax.linear_schedule( + init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps + ) + schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps]) + return schedule_fn + + +def ctc_loss( + logits, + logits_attention_mask, + labels, + blank_id, + loss_reduction="mean", + output_emission_dict=False, + log_epsilon=-100000.0, +): + """Computes CTC loss. + This function performs forward computation over an FSA with `N * 2` states + where `N` is the max number of labels. The states are split into two groups: + Phi states and emission states. a phi-state accepts repetition of + phi (blank)-symbols and transits to emission state when the correct label is + observed. An emission state accepts repetition of the label and transits to + the next phi states at any time (so called epsilon-transition). + Below, `B` denotes the batch size, `T` denotes the time steps in `logits`, + and `N` denotes the time steps in `labels`. + Args: + logits: (B, T, K)-array containing log-probabilities of each class. + logitpaddings: (B, T)-array. Padding indicators for `logits`. + labels: (B, N)-array containing reference integer labels. + labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently, + `labels` must be right-padded, i.e. each row of `labelpaddings` must be + repetition of zeroes, followed by repetition of ones. + blank_id: Id for blank token. + loss_reduction: one of "mean", "sum", "default" + - "none": no reduction is applied. + - "mean": output loss will be divided by target lengths and then the + mean over the batch is taken. + - "sum": output loss are summed over batch + output_emission_dict: whether to output additional information about the emission probs + Returns: + A pair of `(per_seq_loss, aux)`. + per_seq_loss: + (B,)-array containing loss values for each sequence in the batch. + aux: Dictionary containing interim variables used for computing losses. + aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each + phi-state corresponding to the n-th label. + aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each + emission-state corresponding to the n-th label. + aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol + corresponding to each time frame. + aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label + corresponding to each time frame. + """ + # label paddings are indicated by -100 + labelpaddings = labels < 0 + # logit paddings are the inverse of attention_mask + logitpaddings = ~logits_attention_mask + + # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py + batchsize, unused_maxinputlen, num_classes = logits.shape + batchsize_, maxlabellen = labels.shape + + logprobs = jax.nn.log_softmax(logits) + labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32) + + # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1]. + repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32) + repeat = jnp.pad(repeat, ((0, 0), (0, 1))) + + logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1] + logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1] + + one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K] + logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot) + logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N] + + logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N] + logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0) + logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N] + + def loop_body(prev, x): + prev_phi, prev_emit = prev + # emit-to-phi epsilon transition, except if the next label is repetition + prev_phi_orig = prev_phi + prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat)) + + logprob_emit, logprob_phi, pad = x + + # phi-to-emit transition + next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit) + # self-loop transition + next_phi = prev_phi + logprob_phi + # emit-to-phi blank transition only when the next label is repetition + next_phi = next_phi.at[:, 1:].set( + jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat)) + ) + + pad = pad.reshape((batchsize, 1)) + next_emit = pad * prev_emit + (1.0 - pad) * next_emit + next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi + + return (next_phi, next_emit), (next_phi, next_emit) + + xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0))) + _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs) + + # last row needs to be updated with the last epsilon transition + logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1])) + logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last) + + # extract per_seq_loss + one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1] + per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot) + + if loss_reduction == "mean": + target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1) + loss = (per_seq_loss / target_lengths).mean() + elif loss_reduction == "sum": + loss = per_seq_loss.sum() + else: + loss = per_seq_loss + + if not output_emission_dict: + return loss + + return loss, { + "logalpha_phi": logalpha_phi, + "logalpha_emit": logalpha_emit, + "logprobs_phi": logprobs_phi, + "logprobs_emit": logprobs_emit, + } + + +def make_dataset(data_args, seed=42): + # Pre-processing dataset + import re + + def map_nst(entry): + text = entry["text"].lower() + text = text.replace("(...vær stille under dette opptaket...)", "") + text = re.sub('[áàâ]', 'a', text) + text = re.sub('[ä]', 'æ', text) + text = re.sub('[éèëê]', 'e', text) + text = re.sub('[íìïî]', 'i', text) + text = re.sub('[óòöô]', 'o', text) + text = re.sub('[ö]', 'ø', text) + text = re.sub('[ç]', 'c', text) + text = re.sub('[úùüû]', 'u', text) + # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text) + text = re.sub('\s+', ' ', text) + return {"text": text} + + def filter_nst(entry): + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.match(entry["type"], "pIW|CA"): + return False # Spelling out words + return True + + def filter_npsc(entry): + # False if there are digits in the text + if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)): + return False # Too short + if re.search("\d", entry["text"]): + return False + return True + + def map_npsc(entry): + batch = {"text": entry["text"].lower()} + batch["text"] = re.sub('[áàâ]', 'a', batch["text"]) + batch["text"] = re.sub('[ä]', 'æ', batch["text"]) + batch["text"] = re.sub('[éèëê]', 'e', batch["text"]) + batch["text"] = re.sub('[íìïî]', 'i', batch["text"]) + batch["text"] = re.sub('[óòöô]', 'o', batch["text"]) + batch["text"] = re.sub('[ö]', 'ø', batch["text"]) + batch["text"] = re.sub('[ç]', 'c', batch["text"]) + batch["text"] = re.sub('[úùüû]', 'u', batch["text"]) + batch["text"] = re.sub('\s', ' ', batch["text"]) + batch["text"] = re.sub('', 'eee', batch["text"]) + batch["text"] = re.sub('', 'qqq', batch["text"]) + batch["text"] = re.sub('', 'mmm', batch["text"]) + batch["text"] = re.sub('', 'xxx', batch["text"]) + # batch["text"] = re.sub('', '?', batch["text"]) + if "<" in batch["text"]: + raise ValueError(batch["text"]) + return batch + + nst = datasets.load_dataset("NbAiLab/NST", "no-close") + npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3") + # TODO NST_hesitate + + split = len(npsc[data_args.train_split_name]) / (len(npsc[data_args.train_split_name]) + len(npsc[data_args.eval_split_name])) # Use same train/val ratio as NPSC + nst_train = nst[data_args.train_split_name].train_test_split(train_size=split, seed=seed) + nst[data_args.train_split_name] = nst_train["train"] + nst[data_args.eval_split_name] = nst_train["test"] + + nst = nst.filter(filter_nst).map( + map_nst, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NST", + ).shuffle(seed=seed) + npsc = npsc.filter(filter_npsc).map( + map_npsc, + num_proc=data_args.preprocessing_num_workers, + desc="filtering NPSC", + ).shuffle(seed=seed) + + npsc_base = npsc.remove_columns([col for col in npsc[data_args.train_split_name].column_names if col not in ["text", "audio"]]) + nst_base = nst.remove_columns([col for col in nst[data_args.train_split_name].column_names if col not in ["text", "audio"]]) + + combined = {} + for split in data_args.train_split_name, data_args.eval_split_name, data_args.test_split_name: + probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples + probs = (probs / probs.sum()).tolist() + comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed) + combined[split] = comb + + return datasets.DatasetDict(**combined) + +def main(): + # 1. Parse input arguments + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments)) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # 2. Setup logging + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + # Set the verbosity to info of the Transformers logger. + # We only want one process per machine to log things on the screen. + logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR) + if jax.process_index() == 0: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # Set up wandb run + if jax.process_index() == 0: + wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type) + + logger.info("Training/evaluation parameters %s", training_args) + + # Set the default TPU matmul precision and display the number of devices + jax.config.update("jax_default_matmul_precision", training_args.matmul_precision) + logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}") + + # 4. Load dataset + + set_seed(training_args.seed) + raw_datasets = make_dataset(data_args, seed=training_args.seed) + + # raw_datasets = DatasetDict() + + # if training_args.do_train: + # raw_datasets[data_args.train_split_name] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.train_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_eval: + # raw_datasets[data_args.eval_split_name] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=data_args.eval_split_name, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + # if training_args.do_predict: + # test_split = data_args.test_split_name.split("+") + # for split in test_split: + # raw_datasets[split] = load_dataset( + # data_args.dataset_name, + # data_args.dataset_config_name, + # split=split, + # cache_dir=data_args.dataset_cache_dir, + # use_auth_token=True if model_args.use_auth_token else None, + # ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 5. Load pretrained model, tokenizer, and feature extractor + # + # Distributed training: + # The .from_pretrained methods guarantee that only one local process can concurrently + config = Wav2Vec2Config.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + # update config according to training args, model args, and tokenizer attributes + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "ctc_zero_infinity": model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": tokenizer.vocab_size, # len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr": + raise ValueError( + "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to " + "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus," + "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely " + "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`." + ) + + if training_args.precision == "full_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = True + elif training_args.precision == "half_mixed": + dtype = jnp.bfloat16 + training_args.mixed_precision = False + else: + dtype = jnp.float32 + training_args.mixed_precision = False + + model = FlaxWav2Vec2ForCTC.from_pretrained( + model_args.model_name_or_path, + config=config, + dtype=dtype, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + from_pt=True, + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate) + min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate) + max_target_length = data_args.max_label_length + min_target_length = data_args.min_label_length + pad_input_to_multiple_of = data_args.pad_input_to_multiple_of + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + model_input_name = feature_extractor.model_input_names[0] + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ") + chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]' + # gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + # gigaspeech_disfluencies = ["", ""] + # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + # "[vocalized-noise]", "_1"] + # swb_punctuations = ["{", "}", "[", "]-", "]"] + # earnings_disfluencies = ["", "", "", "inaudible", "", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets[data_args.eval_split_name] = raw_datasets[data_args.eval_split_name].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_test_samples is not None: + raw_datasets[data_args.test_split_name] = raw_datasets[data_args.test_split_name].select(range(data_args.max_eval_samples)) + + if training_args.do_train and data_args.remove_punctuation: + + def remove_punctuation(batch): + batch[text_column_name] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "") + ) + + raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].map( + remove_punctuation, + num_proc=data_args.preprocessing_num_workers, + desc="removing punctuation from train split", + ) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # process audio + try: + sample = batch[audio_column_name] + except ValueError: + sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate} + inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) + # process audio length + batch[model_input_name] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # process targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # if dataset_name == "google/xtreme_s": + # # Finally, we tokenize the processed text + # batch["labels"] = tokenizer(input_str).input_ids + # batch["labels_length"] = len(batch["labels"]) + # return batch + + # # Common Voice 9 + # if input_str.startswith('"') and input_str.endswith('"'): + # # we can remove trailing quotation marks as they do not affect the transcription + # input_str = input_str[1:-1] + # # normalize quotation marks + # input_str = re.sub(r'["“”]', '"', input_str) + # # normalize apostrophes + # input_str = re.sub(r"[’']", "'", input_str) + # # normalize hyphens + # input_str = re.sub(r"[—–]", "-", input_str) + # # replace double quotation marks with single + # input_str = input_str.replace('""', '"') + # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str): + # # for CV9, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # TEDLIUM-3 + # # delete the token from the text and replace spaced apostrophes with un-spaced + # input_str = input_str.replace("", "").replace(" '", "'") + + # # GigaSpeech + # for disfluency in gigaspeech_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # convert spelled out punctuation to symbolic form + # for punctuation, replacement in gigaspeech_punctuation.items(): + # input_str = input_str.replace(punctuation, replacement) + # if dataset_name == "speechcolab/gigaspeech" and len(input_str): + # # for GS, we'll normalize the text to always finish with punctuation + # if input_str[-1] not in [".", "?", "!"]: + # input_str = input_str + "." + + # # SWB + # for disfluency in swb_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # remove parenthesised text (test data only) + # input_str = re.sub("[\(].*?[\)]", "", input_str) + # for punctuation in swb_punctuations: + # input_str = input_str.replace(punctuation, "") + # # replace anomalous words with their correct transcriptions + # split_str = input_str.split("/") + # if len(split_str) > 1: + # input_str = " ".join( + # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # # Earnings 22 + # for disfluency in earnings_disfluencies: + # input_str = input_str.replace(disfluency, "") + # # replace mal-formatted ellipsis + # input_str = input_str.replace("…", ".") + + # JIWER compliance + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # Finally, we tokenize the processed text + batch["labels"] = tokenizer(input_str).input_ids + batch["labels_length"] = len(batch["labels"]) + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess dataset", + ) + + # filter data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # filter data with targets shorter than min_target_length or longer than max_target_length + def is_labels_in_length_range(length): + return length > min_target_length # and length < max_target_length + + vectorized_datasets = vectorized_datasets.filter( + is_labels_in_length_range, + num_proc=num_workers, + input_columns=["labels_length"], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # 8. Load Metrics + wer_metric = load_metric("wer") + cer_metric = load_metric("cer") + + def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]): + padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids)) + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(padded_ids, group_tokens=False) + + wer = wer_metric.compute(predictions=pred_str, references=label_str) + cer = cer_metric.compute(predictions=pred_str, references=label_str) + + return {"wer": wer, "cer": cer}, pred_str, label_str + + # 9. save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + processor = AutoProcessor.from_pretrained(training_args.output_dir) + + data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding( + processor=processor, + input_padding="longest", + pad_input_to_multiple_of=pad_input_to_multiple_of, + max_label_length=data_args.max_label_length, + ) + + # Enable tensorboard only on the master node + has_tensorboard = is_tensorboard_available() + if has_tensorboard and jax.process_index() == 0: + try: + from flax.metrics.tensorboard import SummaryWriter + + summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir)) + except ImportError as ie: + has_tensorboard = False + logger.warning( + f"Unable to display metrics through TensorBoard because some package are not installed: {ie}" + ) + else: + logger.warning( + "Unable to display metrics through TensorBoard because the package is not installed: " + "Please run `pip install tensorboard` to enable." + ) + + # 10. Handle the repository creation + if training_args.push_to_hub: + with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f: + git_lfs_extensions = f.read() + if "*.wandb" not in git_lfs_extensions: + f.write("*.wandb filter=lfs diff=lfs merge=lfs -text") + if training_args.hub_model_id is None: + repo_name = get_full_repo_name( + Path(training_args.output_dir).absolute().name, token=training_args.hub_token + ) + else: + repo_name = training_args.hub_model_id + repo = Repository(training_args.output_dir, clone_from=repo_name) + + # 11. Initialize our training + rng = jax.random.PRNGKey(training_args.seed) + rng, dropout_rng = jax.random.split(rng) + + # Store some constants + max_steps = int(training_args.max_steps) + gradient_accumulation_steps = int(training_args.gradient_accumulation_steps) + train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count() + batch_size_per_update = train_batch_size * gradient_accumulation_steps + per_device_eval_batch_size = int(training_args.per_device_eval_batch_size) + eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() + to_dtype = to_bf16 if training_args.mixed_precision else to_fp32 + + if training_args.do_train: + num_train_samples = len(vectorized_datasets[data_args.train_split_name]) + steps_per_epoch = num_train_samples // batch_size_per_update + if max_steps > 0: + num_epochs = -(training_args.max_steps // -steps_per_epoch) + total_train_steps = max_steps + else: + num_epochs = int(training_args.num_train_epochs) + total_train_steps = steps_per_epoch * num_epochs + + # Create learning rate schedule + # Create learning rate schedule + linear_decay_lr_schedule_fn = create_learning_rate_fn( + total_train_steps, + training_args.warmup_steps, + training_args.learning_rate, + ) + + # We use Optax's "masking" functionality to not apply weight decay + # to bias and LayerNorm scale parameters. decay_mask_fn returns a + # mask boolean with the same structure as the parameters. + # The mask is True for parameters that should be decayed. + # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart. + # For FlaxT5, one should correct the layer norm parameter naming + # accordingly - see `run_t5_mlm_flax.py` e.g. + def decay_mask_fn(params): + flat_params = traverse_util.flatten_dict(params) + layer_norm_params = [ + (name, "scale") + for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"] + ] + flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params} + return traverse_util.unflatten_dict(flat_mask) + + if training_args.adafactor: + # Create Adafactor optimizer + optim = optax.adafactor( + learning_rate=linear_decay_lr_schedule_fn, + dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32, + weight_decay_rate=training_args.weight_decay, + weight_decay_mask=decay_mask_fn, + ) + else: + # Create AdamW optimizer + optim = optax.adamw( + learning_rate=linear_decay_lr_schedule_fn, + b1=training_args.adam_beta1, + b2=training_args.adam_beta2, + eps=training_args.adam_epsilon, + weight_decay=training_args.weight_decay, + mask=decay_mask_fn, + ) + + # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1) + if training_args.multisteps and gradient_accumulation_steps > 1: + optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False) + else: + num_epochs = 0 + total_train_steps = 0 + num_train_samples = 0 + optim = None + + # Setup train state + state = MixedPrecisionTrainState.create( + apply_fn=model.__call__, + get_attention_mask_fn=model._get_feature_vector_attention_mask, + params=model.params, + tx=optim, + to_dtype=to_dtype, + dropout_rng=dropout_rng, + max_grad_norm=training_args.max_grad_norm, + ) + + # Replicate the train state on each device + state = state.replicate() + blank_id = model.config.pad_token_id + + # Define gradient update step fn + def train_step(state, batch): + # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch + dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng) + + def compute_loss(params, minibatch): + labels = minibatch.pop("labels") + logits = state.apply_fn( + **minibatch, + params=params, + dropout_rng=dropout_rng, + freeze_feature_encoder=model_args.freeze_feature_encoder, + train=True, + )[0] + logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + return loss + + grad_fn = jax.value_and_grad(compute_loss) + + if gradient_accumulation_steps == 1 or training_args.multisteps: + loss, grad = grad_fn(to_dtype(state.params), batch) + + # Custom gradient accumulation + else: + # add a first dimension over gradient_accumulation_steps for minibatch slices + batch = jax.tree_map( + lambda x: x.reshape( + gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::] + ), + batch, + ) + + def accum_minibatch_step(accum_grad, minibatch): + # compute loss, num labels and grad over minibatch and accumulate + loss, grad = grad_fn(to_dtype(state.params), minibatch) + return jax.tree_map(jnp.add, accum_grad, grad), loss + + # create an initial state for accumulating losses, num labels and gradients + init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params)) + # loop accum minibatch step over the number of gradient accumulation steps + grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch) + + # update state + new_state = state.apply_gradients( + grads=grad, + dropout_rng=new_dropout_rng, + to_dtype=to_dtype, + ) + + # compute gradient norms over all layers and globally for detailed monitoring + layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad) + logs = { + "layer_grad_norm": layer_grad_norm, + "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)), + } + + # compute parameter norms over all layers and globally for detailed monitoring + layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params) + logs["layer_param_norm"] = layer_param_norm + logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm)) + + metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)} + metrics.update(logs) + + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + + return new_state, metrics + + # Define eval fn + def eval_step(params, batch): + labels = batch.pop("labels") + logits = model(**batch, params=params, train=False)[0] + + logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"]) + loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean") + + pred_ids = jnp.argmax(logits, axis=-1) + + # summarize metrics + metrics = {"loss": loss} + metrics = jax.lax.pmean(metrics, axis_name="batch") + # metrics = to_fp32(metrics) + return metrics, pred_ids + + # Create parallel version of the train and eval step + if training_args.do_train: + p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,)) + + if training_args.do_eval: + p_eval_step = jax.pmap(eval_step, "batch") + + def run_evaluation(step): + if training_args.do_eval: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[data_args.eval_split_name], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)): + samples = [vectorized_datasets[data_args.eval_split_name][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, step, prefix="eval") + write_wandb_pred(pred_str, label_str, step) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str) + + def save_checkpoint(step): + # save and push checkpoint to the hub + if jax.process_index() == 0: + params = jax.device_get(jax.tree_map(lambda x: x[0], state.params)) + model.save_pretrained(training_args.output_dir, params=params) + tokenizer.save_pretrained(training_args.output_dir) + if training_args.push_to_hub: + repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False) + + logger.info("***** Running training *****") + logger.info(f" Num examples = {num_train_samples}") + logger.info(f" Num Epochs = {num_epochs}") + logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}") + logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}") + logger.info(f" Total optimization steps = {total_train_steps}") + logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}") + logger.info(f" Use scan: {config.use_scan}") + logger.info(f" Fuse matmuls: {config.fuse_matmuls}") + + train_time = cur_step = 0 + epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0) + for epoch in epochs: + if training_args.do_train: + # ======================== Training ================================ + train_start = time.time() + + # Create sampling rng + rng, input_rng = jax.random.split(rng) + + # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length + train_samples_idx = get_grouped_indices(vectorized_datasets[data_args.train_split_name], batch_size_per_update, input_rng) + train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update) + + # Gather the indices for creating the batch and do a training step + for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1): + samples = [vectorized_datasets[data_args.train_split_name][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + batch = shard(batch.data) + try: + state, train_metric = p_train_step(state, batch) + except TypeError as e: + logger.warning("Encountered following error: \n", e) + + cur_step = epoch * (num_train_samples // batch_size_per_update) + step + + if cur_step % training_args.logging_steps == 0: + # Save metrics + train_metric = unreplicate(train_metric) + train_time += time.time() - train_start + # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step + write_wandb_log(to_fp32(train_metric), cur_step, prefix=data_args.train_split_name) + # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis) + # if has_tensorboard and jax.process_index() == 0: + # write_train_metric(summary_writer, train_metrics, train_time, cur_step) + + epochs.write( + f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})" + ) + + if cur_step % total_train_steps == 0: + break + + if training_args.eval_steps and cur_step % training_args.eval_steps == 0: + run_evaluation(cur_step) + + if cur_step % training_args.save_steps == 0: + save_checkpoint(cur_step) + + if training_args.eval_steps == 0 and (epoch + 1) != num_epochs: + # run evaluation at the end of the epoch if eval steps are not specified + run_evaluation(cur_step) + save_checkpoint(cur_step) + + if training_args.do_train: + save_checkpoint(cur_step) + + cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training + + if training_args.do_eval: + run_evaluation(cur_step) + + # TODO: collapse 'do_predict' into the run_evaluation function + if training_args.do_predict: + for split in [data_args.test_split_name]: + # ======================== Evaluating ============================== + eval_metrics = [] + eval_preds = [] + eval_labels = [] + + # Generate eval set by sequentially sampling indices from the test dataset and grouping by length + eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size) + eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False) + + for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)): + samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx] + batch = data_collator(samples) + labels = batch["labels"] + + metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size) + eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1]))) + eval_metrics.append(metrics) + + eval_labels.extend(labels) + + # normalize eval metrics + eval_metrics = get_metrics(eval_metrics) + eval_metrics = jax.tree_map(jnp.mean, eval_metrics) + eval_metrics = to_fp32(eval_metrics) + + # always run compute metrics + error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels) + eval_metrics.update(error_rate_metric) + error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()]) + + # Print metrics and update progress bar + desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})" + epochs.write(desc) + epochs.desc = desc + + # Save metrics + write_wandb_log(eval_metrics, cur_step, prefix=split) + write_wandb_pred(pred_str, label_str, cur_step, prefix=split) + # if has_tensorboard and jax.process_index() == 0: + # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str) + + +if __name__ == "__main__": + main() diff --git a/wandb/run-20220730_174606-j2u4n7h4/files/config.yaml b/wandb/run-20220730_174606-j2u4n7h4/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08ae650be81ea946dac3ad521619cd456d438730 --- /dev/null +++ b/wandb/run-20220730_174606-j2u4n7h4/files/config.yaml @@ -0,0 +1,27 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.12.9 + code_path: code/run_flax_speech_recognition_ctc.py + framework: huggingface + huggingface_version: 4.21.0 + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.8.10 + start_time: 1659203166 + t: + 1: + - 1 + - 2 + - 3 + - 11 + - 12 + 3: + - 13 + 4: 3.8.10 + 5: 0.12.9 + 6: 4.21.0 + 8: + - 5 diff --git a/wandb/run-20220730_174606-j2u4n7h4/files/diff.patch b/wandb/run-20220730_174606-j2u4n7h4/files/diff.patch new file mode 100644 index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642 --- /dev/null +++ b/wandb/run-20220730_174606-j2u4n7h4/files/diff.patch @@ -0,0 +1,10 @@ +diff --git a/.gitattributes b/.gitattributes +index 755356a..f0eef4b 100644 +--- a/.gitattributes ++++ b/.gitattributes +@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text + *.zip filter=lfs diff=lfs merge=lfs -text + *.zstandard filter=lfs diff=lfs merge=lfs -text + *tfevents* filter=lfs diff=lfs merge=lfs -text ++*.wandb filter=lfs diff=lfs merge=lfs -text +\ No newline at end of file diff --git a/wandb/run-20220730_174606-j2u4n7h4/files/output.log b/wandb/run-20220730_174606-j2u4n7h4/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..816e7ad2c23d6e51ba7659b648665cae3ac597b4 --- /dev/null +++ b/wandb/run-20220730_174606-j2u4n7h4/files/output.log @@ -0,0 +1,2576 @@ +INFO:__main__:Training/evaluation parameters FlaxTrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +gradient_accumulation_steps=1, +gradient_checkpointing=True, +greater_is_better=None, +group_by_length=True, +half_precision_backend=auto, +hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0001, +length_column_name=input_length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul30_17-46-02_t1v-n-eedfb410-w-0, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=100, +logging_strategy=steps, +lr_scheduler_type=linear, +matmul_precision=bfloat16, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +multisteps=False, +no_cuda=False, +num_train_epochs=40.0, +optim=adamw_hf, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=12, +per_device_train_batch_size=12, +precision=full_mixed, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=4000, +weight_decay=0.0, +xpu_backend=None, +) +INFO:__main__:JAX devices: 8, matmul precision: bfloat16 +WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53) +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 78.35it/s] +WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc) +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 467.63it/s] +WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow +WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow +WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow +loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03 +Model config Wav2Vec2Config { + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 1024, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "fuse_matmuls": false, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1280, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.075, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 48, + "num_negatives": 100, + "output_hidden_size": 1280, + "pad_token_id": 0, + "proj_codevector_dim": 1024, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.21.0", + "use_scan": false, + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} +loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326 +Feature extractor Wav2Vec2FeatureExtractor { + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} +loading file ./vocab.json +loading file ./tokenizer_config.json +loading file ./added_tokens.json +loading file ./special_tokens_map.json +Adding to the vocabulary +Adding to the vocabulary +loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db +PyTorch checkpoint contains 965,514,752 parameters. +Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('quantizer', 'codevectors'), ('project_hid', 'kernel'), ('project_q', 'kernel'), ('project_hid', 'bias'), ('project_q', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'weight_proj', 'bias')} +- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). +- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). +Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')} +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + param_dtypes = jax.tree_map(lambda x: x.dtype, state) +removing punctuation from train split #0: 18%|██████████████████████████████████▊ | 1683/9523 [00:00<00:00, 8421.50ex/s] +removing punctuation from train split #1: 8%|████████████████▏ | 781/9523 [00:00<00:01, 7808.41ex/s] +removing punctuation from train split #2: 9%|█████████████████▋ | 852/9523 [00:00<00:01, 8511.55ex/s] +removing punctuation from train split #3: 7%|████████████▉ | 621/9523 [00:00<00:01, 5886.15ex/s] +removing punctuation from train split #4: 0%| | 0/9523 [00:00 to the vocabulary +Adding to the vocabulary +2022-07-30 17:57:48.312630: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2022-07-30 17:57:48.312680: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) +/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`. +run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead. + return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t) +INFO:__main__:***** Running training ***** +INFO:__main__: Num examples = 302693 +INFO:__main__: Num Epochs = 40 +INFO:__main__: Instantaneous batch size per device = 12 +INFO:__main__: Num gradient accumulation steps = 1 +INFO:__main__: Total train batch size (w. parallel & distributed) = 96 +INFO:__main__: Total optimization steps = 126120 +INFO:__main__: Gradient checkpointing: True +INFO:__main__: Use scan: False +INFO:__main__: Fuse matmuls: False +Epoch ... (1/40): 0%| | 0/40 [00:00